1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "../src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 207 #define CHUNKSIZE 15 208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 209 { \ 210 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 211 lastcol1 = col;\ 212 while (high1-low1 > 5) { \ 213 t = (low1+high1)/2; \ 214 if (rp1[t] > col) high1 = t; \ 215 else low1 = t; \ 216 } \ 217 for (_i=low1; _i<high1; _i++) { \ 218 if (rp1[_i] > col) break; \ 219 if (rp1[_i] == col) { \ 220 if (addv == ADD_VALUES) ap1[_i] += value; \ 221 else ap1[_i] = value; \ 222 goto a_noinsert; \ 223 } \ 224 } \ 225 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 226 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 227 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 228 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 229 N = nrow1++ - 1; a->nz++; high1++; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp1[ii+1] = rp1[ii]; \ 233 ap1[ii+1] = ap1[ii]; \ 234 } \ 235 rp1[_i] = col; \ 236 ap1[_i] = value; \ 237 a_noinsert: ; \ 238 ailen[row] = nrow1; \ 239 } 240 241 242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 243 { \ 244 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 245 lastcol2 = col;\ 246 while (high2-low2 > 5) { \ 247 t = (low2+high2)/2; \ 248 if (rp2[t] > col) high2 = t; \ 249 else low2 = t; \ 250 } \ 251 for (_i=low2; _i<high2; _i++) { \ 252 if (rp2[_i] > col) break; \ 253 if (rp2[_i] == col) { \ 254 if (addv == ADD_VALUES) ap2[_i] += value; \ 255 else ap2[_i] = value; \ 256 goto b_noinsert; \ 257 } \ 258 } \ 259 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 261 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 262 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 263 N = nrow2++ - 1; b->nz++; high2++; \ 264 /* shift up all the later entries in this row */ \ 265 for (ii=N; ii>=_i; ii--) { \ 266 rp2[ii+1] = rp2[ii]; \ 267 ap2[ii+1] = ap2[ii]; \ 268 } \ 269 rp2[_i] = col; \ 270 ap2[_i] = value; \ 271 b_noinsert: ; \ 272 bilen[row] = nrow2; \ 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 278 { 279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 281 PetscErrorCode ierr; 282 PetscInt l,*garray = mat->garray,diag; 283 284 PetscFunctionBegin; 285 /* code only works for square matrices A */ 286 287 /* find size of row to the left of the diagonal part */ 288 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 289 row = row - diag; 290 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 291 if (garray[b->j[b->i[row]+l]] > diag) break; 292 } 293 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 294 295 /* diagonal part */ 296 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 297 298 /* right of diagonal part */ 299 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIAIJ" 305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 308 PetscScalar value; 309 PetscErrorCode ierr; 310 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 311 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 312 PetscTruth roworiented = aij->roworiented; 313 314 /* Some Variables required in the macro */ 315 Mat A = aij->A; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 318 MatScalar *aa = a->a; 319 PetscTruth ignorezeroentries = a->ignorezeroentries; 320 Mat B = aij->B; 321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 322 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 323 MatScalar *ba = b->a; 324 325 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 326 PetscInt nonew = a->nonew; 327 MatScalar *ap1,*ap2; 328 329 PetscFunctionBegin; 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 if (ignorezeroentries && v[i*n] == 0.0) continue; 397 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 398 } else { 399 if (ignorezeroentries && v[i] == 0.0) continue; 400 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 401 } 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatGetValues_MPIAIJ" 410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscErrorCode ierr; 414 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 415 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 416 417 PetscFunctionBegin; 418 for (i=0; i<m; i++) { 419 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 420 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 421 if (idxm[i] >= rstart && idxm[i] < rend) { 422 row = idxm[i] - rstart; 423 for (j=0; j<n; j++) { 424 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 425 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 426 if (idxn[j] >= cstart && idxn[j] < cend){ 427 col = idxn[j] - cstart; 428 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 429 } else { 430 if (!aij->colmap) { 431 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 } 433 #if defined (PETSC_USE_CTABLE) 434 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 435 col --; 436 #else 437 col = aij->colmap[idxn[j]] - 1; 438 #endif 439 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 440 else { 441 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 442 } 443 } 444 } 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 447 } 448 } 449 PetscFunctionReturn(0); 450 } 451 452 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 456 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 457 { 458 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 459 PetscErrorCode ierr; 460 PetscInt nstash,reallocs; 461 InsertMode addv; 462 463 PetscFunctionBegin; 464 if (aij->donotstash) { 465 PetscFunctionReturn(0); 466 } 467 468 /* make sure all processors are either in INSERTMODE or ADDMODE */ 469 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 470 if (addv == (ADD_VALUES|INSERT_VALUES)) { 471 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 472 } 473 mat->insertmode = addv; /* in case this processor had no cache */ 474 475 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 476 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 477 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 483 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 484 { 485 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 486 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 487 PetscErrorCode ierr; 488 PetscMPIInt n; 489 PetscInt i,j,rstart,ncols,flg; 490 PetscInt *row,*col; 491 PetscTruth other_disassembled; 492 PetscScalar *val; 493 InsertMode addv = mat->insertmode; 494 495 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 496 PetscFunctionBegin; 497 if (!aij->donotstash) { 498 while (1) { 499 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 500 if (!flg) break; 501 502 for (i=0; i<n;) { 503 /* Now identify the consecutive vals belonging to the same row */ 504 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 505 if (j < n) ncols = j-i; 506 else ncols = n-i; 507 /* Now assemble all these values with a single function call */ 508 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 509 i = j; 510 } 511 } 512 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 513 } 514 a->compressedrow.use = PETSC_FALSE; 515 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 516 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 517 518 /* determine if any processor has disassembled, if so we must 519 also disassemble ourselfs, in order that we may reassemble. */ 520 /* 521 if nonzero structure of submatrix B cannot change then we know that 522 no processor disassembled thus we can skip this stuff 523 */ 524 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 525 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 526 if (mat->was_assembled && !other_disassembled) { 527 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 } 530 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 531 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 532 } 533 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 534 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 535 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 537 538 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 539 aij->rowvalues = 0; 540 541 /* used by MatAXPY() */ 542 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 543 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 544 545 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 546 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 552 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 553 { 554 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 559 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatZeroRows_MPIAIJ" 565 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 566 { 567 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 568 PetscErrorCode ierr; 569 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 570 PetscInt i,*owners = A->rmap->range; 571 PetscInt *nprocs,j,idx,nsends,row; 572 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 573 PetscInt *rvalues,count,base,slen,*source; 574 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 575 MPI_Comm comm = ((PetscObject)A)->comm; 576 MPI_Request *send_waits,*recv_waits; 577 MPI_Status recv_status,*send_status; 578 #if defined(PETSC_DEBUG) 579 PetscTruth found = PETSC_FALSE; 580 #endif 581 582 PetscFunctionBegin; 583 /* first count number of contributors to each processor */ 584 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 585 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 586 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 587 j = 0; 588 for (i=0; i<N; i++) { 589 if (lastidx > (idx = rows[i])) j = 0; 590 lastidx = idx; 591 for (; j<size; j++) { 592 if (idx >= owners[j] && idx < owners[j+1]) { 593 nprocs[2*j]++; 594 nprocs[2*j+1] = 1; 595 owner[i] = j; 596 #if defined(PETSC_DEBUG) 597 found = PETSC_TRUE; 598 #endif 599 break; 600 } 601 } 602 #if defined(PETSC_DEBUG) 603 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 604 found = PETSC_FALSE; 605 #endif 606 } 607 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 608 609 /* inform other processors of number of messages and max length*/ 610 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 611 612 /* post receives: */ 613 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 614 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 615 for (i=0; i<nrecvs; i++) { 616 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 617 } 618 619 /* do sends: 620 1) starts[i] gives the starting index in svalues for stuff going to 621 the ith processor 622 */ 623 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 624 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 625 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 626 starts[0] = 0; 627 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 628 for (i=0; i<N; i++) { 629 svalues[starts[owner[i]]++] = rows[i]; 630 } 631 632 starts[0] = 0; 633 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 634 count = 0; 635 for (i=0; i<size; i++) { 636 if (nprocs[2*i+1]) { 637 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 638 } 639 } 640 ierr = PetscFree(starts);CHKERRQ(ierr); 641 642 base = owners[rank]; 643 644 /* wait on receives */ 645 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 646 source = lens + nrecvs; 647 count = nrecvs; slen = 0; 648 while (count) { 649 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 650 /* unpack receives into our local space */ 651 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 652 source[imdex] = recv_status.MPI_SOURCE; 653 lens[imdex] = n; 654 slen += n; 655 count--; 656 } 657 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 658 659 /* move the data into the send scatter */ 660 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 661 count = 0; 662 for (i=0; i<nrecvs; i++) { 663 values = rvalues + i*nmax; 664 for (j=0; j<lens[i]; j++) { 665 lrows[count++] = values[j] - base; 666 } 667 } 668 ierr = PetscFree(rvalues);CHKERRQ(ierr); 669 ierr = PetscFree(lens);CHKERRQ(ierr); 670 ierr = PetscFree(owner);CHKERRQ(ierr); 671 ierr = PetscFree(nprocs);CHKERRQ(ierr); 672 673 /* actually zap the local rows */ 674 /* 675 Zero the required rows. If the "diagonal block" of the matrix 676 is square and the user wishes to set the diagonal we use separate 677 code so that MatSetValues() is not called for each diagonal allocating 678 new memory, thus calling lots of mallocs and slowing things down. 679 680 Contributed by: Matthew Knepley 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) { 726 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 727 } 728 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 730 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 731 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 737 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 738 { 739 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 740 PetscErrorCode ierr; 741 742 PetscFunctionBegin; 743 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatMultAdd_MPIAIJ" 749 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 750 { 751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 757 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 758 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 764 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 765 { 766 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 767 PetscErrorCode ierr; 768 PetscTruth merged; 769 770 PetscFunctionBegin; 771 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 772 /* do nondiagonal part */ 773 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 774 if (!merged) { 775 /* send it on its way */ 776 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 777 /* do local part */ 778 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 779 /* receive remote parts: note this assumes the values are not actually */ 780 /* added in yy until the next line, */ 781 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 } else { 783 /* do local part */ 784 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 785 /* send it on its way */ 786 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 /* values actually were received in the Begin() but we need to call this nop */ 788 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 EXTERN_C_BEGIN 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 796 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 797 { 798 MPI_Comm comm; 799 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 800 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 801 IS Me,Notme; 802 PetscErrorCode ierr; 803 PetscInt M,N,first,last,*notme,i; 804 PetscMPIInt size; 805 806 PetscFunctionBegin; 807 808 /* Easy test: symmetric diagonal block */ 809 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 810 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 811 if (!*f) PetscFunctionReturn(0); 812 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 813 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 814 if (size == 1) PetscFunctionReturn(0); 815 816 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 817 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 818 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 819 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 820 for (i=0; i<first; i++) notme[i] = i; 821 for (i=last; i<M; i++) notme[i-last+first] = i; 822 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 823 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 824 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 825 Aoff = Aoffs[0]; 826 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 827 Boff = Boffs[0]; 828 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 829 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 830 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 831 ierr = ISDestroy(Me);CHKERRQ(ierr); 832 ierr = ISDestroy(Notme);CHKERRQ(ierr); 833 834 PetscFunctionReturn(0); 835 } 836 EXTERN_C_END 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 840 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 841 { 842 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 843 PetscErrorCode ierr; 844 845 PetscFunctionBegin; 846 /* do nondiagonal part */ 847 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 848 /* send it on its way */ 849 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 /* do local part */ 851 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 852 /* receive remote parts */ 853 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 /* 858 This only works correctly for square matrices where the subblock A->A is the 859 diagonal block 860 */ 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 863 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 864 { 865 PetscErrorCode ierr; 866 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 867 868 PetscFunctionBegin; 869 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 870 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 871 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 872 } 873 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatScale_MPIAIJ" 879 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 880 { 881 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 882 PetscErrorCode ierr; 883 884 PetscFunctionBegin; 885 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 886 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatDestroy_MPIAIJ" 892 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 893 { 894 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 #if defined(PETSC_USE_LOG) 899 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 900 #endif 901 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 902 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 903 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 904 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 905 #if defined (PETSC_USE_CTABLE) 906 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 907 #else 908 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 909 #endif 910 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 911 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 912 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 913 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 914 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 915 ierr = PetscFree(aij);CHKERRQ(ierr); 916 917 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 923 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 924 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatView_MPIAIJ_Binary" 930 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 931 { 932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 933 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 934 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 935 PetscErrorCode ierr; 936 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 937 int fd; 938 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 939 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 940 PetscScalar *column_values; 941 942 PetscFunctionBegin; 943 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 944 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 945 nz = A->nz + B->nz; 946 if (!rank) { 947 header[0] = MAT_FILE_COOKIE; 948 header[1] = mat->rmap->N; 949 header[2] = mat->cmap->N; 950 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 952 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 953 /* get largest number of rows any processor has */ 954 rlen = mat->rmap->n; 955 range = mat->rmap->range; 956 for (i=1; i<size; i++) { 957 rlen = PetscMax(rlen,range[i+1] - range[i]); 958 } 959 } else { 960 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 961 rlen = mat->rmap->n; 962 } 963 964 /* load up the local row counts */ 965 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 966 for (i=0; i<mat->rmap->n; i++) { 967 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 968 } 969 970 /* store the row lengths to the file */ 971 if (!rank) { 972 MPI_Status status; 973 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 974 for (i=1; i<size; i++) { 975 rlen = range[i+1] - range[i]; 976 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 977 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 } else { 980 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 981 } 982 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 983 984 /* load up the local column indices */ 985 nzmax = nz; /* )th processor needs space a largest processor needs */ 986 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 987 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 988 cnt = 0; 989 for (i=0; i<mat->rmap->n; i++) { 990 for (j=B->i[i]; j<B->i[i+1]; j++) { 991 if ( (col = garray[B->j[j]]) > cstart) break; 992 column_indices[cnt++] = col; 993 } 994 for (k=A->i[i]; k<A->i[i+1]; k++) { 995 column_indices[cnt++] = A->j[k] + cstart; 996 } 997 for (; j<B->i[i+1]; j++) { 998 column_indices[cnt++] = garray[B->j[j]]; 999 } 1000 } 1001 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1002 1003 /* store the column indices to the file */ 1004 if (!rank) { 1005 MPI_Status status; 1006 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1007 for (i=1; i<size; i++) { 1008 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1009 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1010 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1011 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1012 } 1013 } else { 1014 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1015 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1016 } 1017 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1018 1019 /* load up the local column values */ 1020 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1021 cnt = 0; 1022 for (i=0; i<mat->rmap->n; i++) { 1023 for (j=B->i[i]; j<B->i[i+1]; j++) { 1024 if ( garray[B->j[j]] > cstart) break; 1025 column_values[cnt++] = B->a[j]; 1026 } 1027 for (k=A->i[i]; k<A->i[i+1]; k++) { 1028 column_values[cnt++] = A->a[k]; 1029 } 1030 for (; j<B->i[i+1]; j++) { 1031 column_values[cnt++] = B->a[j]; 1032 } 1033 } 1034 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1035 1036 /* store the column values to the file */ 1037 if (!rank) { 1038 MPI_Status status; 1039 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1040 for (i=1; i<size; i++) { 1041 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1042 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1043 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1044 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1045 } 1046 } else { 1047 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1048 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1049 } 1050 ierr = PetscFree(column_values);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1056 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1057 { 1058 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1059 PetscErrorCode ierr; 1060 PetscMPIInt rank = aij->rank,size = aij->size; 1061 PetscTruth isdraw,iascii,isbinary; 1062 PetscViewer sviewer; 1063 PetscViewerFormat format; 1064 1065 PetscFunctionBegin; 1066 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1067 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1068 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1069 if (iascii) { 1070 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1071 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1072 MatInfo info; 1073 PetscTruth inodes; 1074 1075 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1076 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1077 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1078 if (!inodes) { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } else { 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1083 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1084 } 1085 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1087 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1089 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1090 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1091 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1094 PetscInt inodecount,inodelimit,*inodes; 1095 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1096 if (inodes) { 1097 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1098 } else { 1099 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1100 } 1101 PetscFunctionReturn(0); 1102 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1103 PetscFunctionReturn(0); 1104 } 1105 } else if (isbinary) { 1106 if (size == 1) { 1107 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1108 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1109 } else { 1110 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } else if (isdraw) { 1114 PetscDraw draw; 1115 PetscTruth isnull; 1116 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1117 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1118 } 1119 1120 if (size == 1) { 1121 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1122 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1123 } else { 1124 /* assemble the entire matrix onto first processor. */ 1125 Mat A; 1126 Mat_SeqAIJ *Aloc; 1127 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1128 MatScalar *a; 1129 1130 if (mat->rmap->N > 1024) { 1131 PetscTruth flg = PETSC_FALSE; 1132 1133 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1134 if (!flg) { 1135 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1136 } 1137 } 1138 1139 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1140 if (!rank) { 1141 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1142 } else { 1143 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1144 } 1145 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1146 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1147 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1148 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1149 1150 /* copy over the A part */ 1151 Aloc = (Mat_SeqAIJ*)aij->A->data; 1152 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1153 row = mat->rmap->rstart; 1154 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1155 for (i=0; i<m; i++) { 1156 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1157 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1158 } 1159 aj = Aloc->j; 1160 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1161 1162 /* copy over the B part */ 1163 Aloc = (Mat_SeqAIJ*)aij->B->data; 1164 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1165 row = mat->rmap->rstart; 1166 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1167 ct = cols; 1168 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1169 for (i=0; i<m; i++) { 1170 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1171 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1172 } 1173 ierr = PetscFree(ct);CHKERRQ(ierr); 1174 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1175 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1176 /* 1177 Everyone has to call to draw the matrix since the graphics waits are 1178 synchronized across all processors that share the PetscDraw object 1179 */ 1180 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1181 if (!rank) { 1182 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1183 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1184 } 1185 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1186 ierr = MatDestroy(A);CHKERRQ(ierr); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatView_MPIAIJ" 1193 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1194 { 1195 PetscErrorCode ierr; 1196 PetscTruth iascii,isdraw,issocket,isbinary; 1197 1198 PetscFunctionBegin; 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1203 if (iascii || isdraw || isbinary || issocket) { 1204 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1205 } else { 1206 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatRelax_MPIAIJ" 1213 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1214 { 1215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1216 PetscErrorCode ierr; 1217 Vec bb1 = 0; 1218 PetscTruth hasop; 1219 1220 PetscFunctionBegin; 1221 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1222 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1223 } 1224 1225 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1226 if (flag & SOR_ZERO_INITIAL_GUESS) { 1227 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1228 its--; 1229 } 1230 1231 while (its--) { 1232 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 1235 /* update rhs: bb1 = bb - B*x */ 1236 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1237 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1238 1239 /* local sweep */ 1240 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1241 } 1242 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1243 if (flag & SOR_ZERO_INITIAL_GUESS) { 1244 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1245 its--; 1246 } 1247 while (its--) { 1248 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 1251 /* update rhs: bb1 = bb - B*x */ 1252 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1253 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1254 1255 /* local sweep */ 1256 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1257 } 1258 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1259 if (flag & SOR_ZERO_INITIAL_GUESS) { 1260 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1261 its--; 1262 } 1263 while (its--) { 1264 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 1267 /* update rhs: bb1 = bb - B*x */ 1268 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1269 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1270 1271 /* local sweep */ 1272 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1273 } 1274 } else if (flag & SOR_EISENSTAT) { 1275 Vec xx1; 1276 1277 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1278 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1279 1280 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 if (!mat->diag) { 1283 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1284 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1285 } 1286 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1287 if (hasop) { 1288 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1289 } else { 1290 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1291 } 1292 ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr); 1293 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1294 1295 /* local sweep */ 1296 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1297 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1298 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1299 } else { 1300 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1301 } 1302 1303 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatPermute_MPIAIJ" 1309 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1310 { 1311 MPI_Comm comm,pcomm; 1312 PetscInt first,local_size,nrows; 1313 const PetscInt *rows; 1314 PetscMPIInt size; 1315 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1320 /* make a collective version of 'rowp' */ 1321 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1322 if (pcomm==comm) { 1323 crowp = rowp; 1324 } else { 1325 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1326 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1327 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1328 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1329 } 1330 /* collect the global row permutation and invert it */ 1331 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1332 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1333 if (pcomm!=comm) { 1334 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1335 } 1336 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1337 /* get the local target indices */ 1338 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1339 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1340 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1341 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1342 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1343 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1344 /* the column permutation is so much easier; 1345 make a local version of 'colp' and invert it */ 1346 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1347 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1348 if (size==1) { 1349 lcolp = colp; 1350 } else { 1351 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1352 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1353 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1354 } 1355 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1356 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1357 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1358 if (size>1) { 1359 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1360 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1361 } 1362 /* now we just get the submatrix */ 1363 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1364 /* clean up */ 1365 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1366 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1372 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1373 { 1374 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1375 Mat A = mat->A,B = mat->B; 1376 PetscErrorCode ierr; 1377 PetscReal isend[5],irecv[5]; 1378 1379 PetscFunctionBegin; 1380 info->block_size = 1.0; 1381 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1383 isend[3] = info->memory; isend[4] = info->mallocs; 1384 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1386 isend[3] += info->memory; isend[4] += info->mallocs; 1387 if (flag == MAT_LOCAL) { 1388 info->nz_used = isend[0]; 1389 info->nz_allocated = isend[1]; 1390 info->nz_unneeded = isend[2]; 1391 info->memory = isend[3]; 1392 info->mallocs = isend[4]; 1393 } else if (flag == MAT_GLOBAL_MAX) { 1394 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1395 info->nz_used = irecv[0]; 1396 info->nz_allocated = irecv[1]; 1397 info->nz_unneeded = irecv[2]; 1398 info->memory = irecv[3]; 1399 info->mallocs = irecv[4]; 1400 } else if (flag == MAT_GLOBAL_SUM) { 1401 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1402 info->nz_used = irecv[0]; 1403 info->nz_allocated = irecv[1]; 1404 info->nz_unneeded = irecv[2]; 1405 info->memory = irecv[3]; 1406 info->mallocs = irecv[4]; 1407 } 1408 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1409 info->fill_ratio_needed = 0; 1410 info->factor_mallocs = 0; 1411 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSetOption_MPIAIJ" 1417 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1418 { 1419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 switch (op) { 1424 case MAT_NEW_NONZERO_LOCATIONS: 1425 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1426 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1427 case MAT_KEEP_NONZERO_PATTERN: 1428 case MAT_NEW_NONZERO_LOCATION_ERR: 1429 case MAT_USE_INODES: 1430 case MAT_IGNORE_ZERO_ENTRIES: 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1433 break; 1434 case MAT_ROW_ORIENTED: 1435 a->roworiented = flg; 1436 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1437 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1438 break; 1439 case MAT_NEW_DIAGONALS: 1440 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1441 break; 1442 case MAT_IGNORE_OFF_PROC_ENTRIES: 1443 a->donotstash = PETSC_TRUE; 1444 break; 1445 case MAT_SYMMETRIC: 1446 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1447 break; 1448 case MAT_STRUCTURALLY_SYMMETRIC: 1449 case MAT_HERMITIAN: 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetRow_MPIAIJ" 1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1465 PetscErrorCode ierr; 1466 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1467 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1468 PetscInt *cmap,*idx_p; 1469 1470 PetscFunctionBegin; 1471 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1472 mat->getrowactive = PETSC_TRUE; 1473 1474 if (!mat->rowvalues && (idx || v)) { 1475 /* 1476 allocate enough space to hold information from the longest row. 1477 */ 1478 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1479 PetscInt max = 1,tmp; 1480 for (i=0; i<matin->rmap->n; i++) { 1481 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1482 if (max < tmp) { max = tmp; } 1483 } 1484 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1485 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1486 } 1487 1488 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1489 lrow = row - rstart; 1490 1491 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1492 if (!v) {pvA = 0; pvB = 0;} 1493 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1494 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1495 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1496 nztot = nzA + nzB; 1497 1498 cmap = mat->garray; 1499 if (v || idx) { 1500 if (nztot) { 1501 /* Sort by increasing column numbers, assuming A and B already sorted */ 1502 PetscInt imark = -1; 1503 if (v) { 1504 *v = v_p = mat->rowvalues; 1505 for (i=0; i<nzB; i++) { 1506 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1507 else break; 1508 } 1509 imark = i; 1510 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1511 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1512 } 1513 if (idx) { 1514 *idx = idx_p = mat->rowindices; 1515 if (imark > -1) { 1516 for (i=0; i<imark; i++) { 1517 idx_p[i] = cmap[cworkB[i]]; 1518 } 1519 } else { 1520 for (i=0; i<nzB; i++) { 1521 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1522 else break; 1523 } 1524 imark = i; 1525 } 1526 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1527 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1528 } 1529 } else { 1530 if (idx) *idx = 0; 1531 if (v) *v = 0; 1532 } 1533 } 1534 *nz = nztot; 1535 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1536 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1542 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1543 { 1544 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1545 1546 PetscFunctionBegin; 1547 if (!aij->getrowactive) { 1548 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1549 } 1550 aij->getrowactive = PETSC_FALSE; 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatNorm_MPIAIJ" 1556 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1557 { 1558 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1559 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1560 PetscErrorCode ierr; 1561 PetscInt i,j,cstart = mat->cmap->rstart; 1562 PetscReal sum = 0.0; 1563 MatScalar *v; 1564 1565 PetscFunctionBegin; 1566 if (aij->size == 1) { 1567 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1568 } else { 1569 if (type == NORM_FROBENIUS) { 1570 v = amat->a; 1571 for (i=0; i<amat->nz; i++) { 1572 #if defined(PETSC_USE_COMPLEX) 1573 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1574 #else 1575 sum += (*v)*(*v); v++; 1576 #endif 1577 } 1578 v = bmat->a; 1579 for (i=0; i<bmat->nz; i++) { 1580 #if defined(PETSC_USE_COMPLEX) 1581 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1582 #else 1583 sum += (*v)*(*v); v++; 1584 #endif 1585 } 1586 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1587 *norm = sqrt(*norm); 1588 } else if (type == NORM_1) { /* max column norm */ 1589 PetscReal *tmp,*tmp2; 1590 PetscInt *jj,*garray = aij->garray; 1591 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1592 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1593 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1594 *norm = 0.0; 1595 v = amat->a; jj = amat->j; 1596 for (j=0; j<amat->nz; j++) { 1597 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1598 } 1599 v = bmat->a; jj = bmat->j; 1600 for (j=0; j<bmat->nz; j++) { 1601 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1602 } 1603 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1604 for (j=0; j<mat->cmap->N; j++) { 1605 if (tmp2[j] > *norm) *norm = tmp2[j]; 1606 } 1607 ierr = PetscFree(tmp);CHKERRQ(ierr); 1608 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1609 } else if (type == NORM_INFINITY) { /* max row norm */ 1610 PetscReal ntemp = 0.0; 1611 for (j=0; j<aij->A->rmap->n; j++) { 1612 v = amat->a + amat->i[j]; 1613 sum = 0.0; 1614 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1615 sum += PetscAbsScalar(*v); v++; 1616 } 1617 v = bmat->a + bmat->i[j]; 1618 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1619 sum += PetscAbsScalar(*v); v++; 1620 } 1621 if (sum > ntemp) ntemp = sum; 1622 } 1623 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1624 } else { 1625 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1626 } 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatTranspose_MPIAIJ" 1633 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1634 { 1635 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1636 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1637 PetscErrorCode ierr; 1638 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1639 PetscInt cstart=A->cmap->rstart,ncol; 1640 Mat B; 1641 MatScalar *array; 1642 1643 PetscFunctionBegin; 1644 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1645 1646 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1647 ai = Aloc->i; aj = Aloc->j; 1648 bi = Bloc->i; bj = Bloc->j; 1649 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1650 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1651 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1652 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1653 for (i=0; i<ai[ma]; i++){ 1654 d_nnz[aj[i]] ++; 1655 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1656 } 1657 1658 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1659 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1660 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1661 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1662 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1663 } else { 1664 B = *matout; 1665 } 1666 1667 /* copy over the A part */ 1668 array = Aloc->a; 1669 row = A->rmap->rstart; 1670 for (i=0; i<ma; i++) { 1671 ncol = ai[i+1]-ai[i]; 1672 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1673 row++; array += ncol; aj += ncol; 1674 } 1675 aj = Aloc->j; 1676 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1677 1678 /* copy over the B part */ 1679 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1680 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1681 array = Bloc->a; 1682 row = A->rmap->rstart; 1683 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1684 cols_tmp = cols; 1685 for (i=0; i<mb; i++) { 1686 ncol = bi[i+1]-bi[i]; 1687 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1688 row++; array += ncol; cols_tmp += ncol; 1689 } 1690 ierr = PetscFree(cols);CHKERRQ(ierr); 1691 1692 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1695 *matout = B; 1696 } else { 1697 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1698 } 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1704 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1705 { 1706 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1707 Mat a = aij->A,b = aij->B; 1708 PetscErrorCode ierr; 1709 PetscInt s1,s2,s3; 1710 1711 PetscFunctionBegin; 1712 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1713 if (rr) { 1714 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1715 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1716 /* Overlap communication with computation. */ 1717 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1718 } 1719 if (ll) { 1720 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1721 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1722 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1723 } 1724 /* scale the diagonal block */ 1725 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1726 1727 if (rr) { 1728 /* Do a scatter end and then right scale the off-diagonal block */ 1729 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1730 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1731 } 1732 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1738 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1739 { 1740 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1745 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1750 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1751 { 1752 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatEqual_MPIAIJ" 1762 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1763 { 1764 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1765 Mat a,b,c,d; 1766 PetscTruth flg; 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 a = matA->A; b = matA->B; 1771 c = matB->A; d = matB->B; 1772 1773 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1774 if (flg) { 1775 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1776 } 1777 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatCopy_MPIAIJ" 1783 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1784 { 1785 PetscErrorCode ierr; 1786 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1787 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1788 1789 PetscFunctionBegin; 1790 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1791 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1792 /* because of the column compression in the off-processor part of the matrix a->B, 1793 the number of columns in a->B and b->B may be different, hence we cannot call 1794 the MatCopy() directly on the two parts. If need be, we can provide a more 1795 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1796 then copying the submatrices */ 1797 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1798 } else { 1799 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1800 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1807 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1808 { 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 #include "petscblaslapack.h" 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatAXPY_MPIAIJ" 1819 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1820 { 1821 PetscErrorCode ierr; 1822 PetscInt i; 1823 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1824 PetscBLASInt bnz,one=1; 1825 Mat_SeqAIJ *x,*y; 1826 1827 PetscFunctionBegin; 1828 if (str == SAME_NONZERO_PATTERN) { 1829 PetscScalar alpha = a; 1830 x = (Mat_SeqAIJ *)xx->A->data; 1831 y = (Mat_SeqAIJ *)yy->A->data; 1832 bnz = PetscBLASIntCast(x->nz); 1833 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1834 x = (Mat_SeqAIJ *)xx->B->data; 1835 y = (Mat_SeqAIJ *)yy->B->data; 1836 bnz = PetscBLASIntCast(x->nz); 1837 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1838 } else if (str == SUBSET_NONZERO_PATTERN) { 1839 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1840 1841 x = (Mat_SeqAIJ *)xx->B->data; 1842 y = (Mat_SeqAIJ *)yy->B->data; 1843 if (y->xtoy && y->XtoY != xx->B) { 1844 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1845 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1846 } 1847 if (!y->xtoy) { /* get xtoy */ 1848 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1849 y->XtoY = xx->B; 1850 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1851 } 1852 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1853 } else { 1854 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatConjugate_MPIAIJ" 1863 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1864 { 1865 #if defined(PETSC_USE_COMPLEX) 1866 PetscErrorCode ierr; 1867 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1868 1869 PetscFunctionBegin; 1870 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1871 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1872 #else 1873 PetscFunctionBegin; 1874 #endif 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatRealPart_MPIAIJ" 1880 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1881 { 1882 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1887 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1893 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1894 { 1895 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1900 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #ifdef PETSC_HAVE_PBGL 1905 1906 #include <boost/parallel/mpi/bsp_process_group.hpp> 1907 #include <boost/graph/distributed/ilu_default_graph.hpp> 1908 #include <boost/graph/distributed/ilu_0_block.hpp> 1909 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1910 #include <boost/graph/distributed/petsc/interface.hpp> 1911 #include <boost/multi_array.hpp> 1912 #include <boost/parallel/distributed_property_map->hpp> 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1916 /* 1917 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1918 */ 1919 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1920 { 1921 namespace petsc = boost::distributed::petsc; 1922 1923 namespace graph_dist = boost::graph::distributed; 1924 using boost::graph::distributed::ilu_default::process_group_type; 1925 using boost::graph::ilu_permuted; 1926 1927 PetscTruth row_identity, col_identity; 1928 PetscContainer c; 1929 PetscInt m, n, M, N; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1934 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1935 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1936 if (!row_identity || !col_identity) { 1937 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1938 } 1939 1940 process_group_type pg; 1941 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1942 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1943 lgraph_type& level_graph = *lgraph_p; 1944 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1945 1946 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1947 ilu_permuted(level_graph); 1948 1949 /* put together the new matrix */ 1950 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1951 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1952 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1953 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1954 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1955 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 1958 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1959 ierr = PetscContainerSetPointer(c, lgraph_p); 1960 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1966 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1967 { 1968 PetscFunctionBegin; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatSolve_MPIAIJ" 1974 /* 1975 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1976 */ 1977 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1978 { 1979 namespace graph_dist = boost::graph::distributed; 1980 1981 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1982 lgraph_type* lgraph_p; 1983 PetscContainer c; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1988 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1989 ierr = VecCopy(b, x);CHKERRQ(ierr); 1990 1991 PetscScalar* array_x; 1992 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1993 PetscInt sx; 1994 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1995 1996 PetscScalar* array_b; 1997 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1998 PetscInt sb; 1999 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2000 2001 lgraph_type& level_graph = *lgraph_p; 2002 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2003 2004 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2005 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2006 ref_x(array_x, boost::extents[num_vertices(graph)]); 2007 2008 typedef boost::iterator_property_map<array_ref_type::iterator, 2009 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2010 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2011 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2012 2013 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2014 2015 PetscFunctionReturn(0); 2016 } 2017 #endif 2018 2019 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2020 PetscInt nzlocal,nsends,nrecvs; 2021 PetscMPIInt *send_rank; 2022 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2023 PetscScalar *sbuf_a,**rbuf_a; 2024 PetscErrorCode (*MatDestroy)(Mat); 2025 } Mat_Redundant; 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2029 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2030 { 2031 PetscErrorCode ierr; 2032 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2033 PetscInt i; 2034 2035 PetscFunctionBegin; 2036 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2037 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2038 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2039 for (i=0; i<redund->nrecvs; i++){ 2040 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2041 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2042 } 2043 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2044 ierr = PetscFree(redund);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatDestroy_MatRedundant" 2050 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2051 { 2052 PetscErrorCode ierr; 2053 PetscContainer container; 2054 Mat_Redundant *redund=PETSC_NULL; 2055 2056 PetscFunctionBegin; 2057 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2058 if (container) { 2059 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2060 } else { 2061 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2062 } 2063 A->ops->destroy = redund->MatDestroy; 2064 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2065 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2066 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2072 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2073 { 2074 PetscMPIInt rank,size; 2075 MPI_Comm comm=((PetscObject)mat)->comm; 2076 PetscErrorCode ierr; 2077 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2078 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2079 PetscInt *rowrange=mat->rmap->range; 2080 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2081 Mat A=aij->A,B=aij->B,C=*matredundant; 2082 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2083 PetscScalar *sbuf_a; 2084 PetscInt nzlocal=a->nz+b->nz; 2085 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2086 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2087 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2088 MatScalar *aworkA,*aworkB; 2089 PetscScalar *vals; 2090 PetscMPIInt tag1,tag2,tag3,imdex; 2091 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2092 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2093 MPI_Status recv_status,*send_status; 2094 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2095 PetscInt **rbuf_j=PETSC_NULL; 2096 PetscScalar **rbuf_a=PETSC_NULL; 2097 Mat_Redundant *redund=PETSC_NULL; 2098 PetscContainer container; 2099 2100 PetscFunctionBegin; 2101 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2103 2104 if (reuse == MAT_REUSE_MATRIX) { 2105 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2107 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2108 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2109 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2110 if (container) { 2111 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2112 } else { 2113 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2114 } 2115 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2116 2117 nsends = redund->nsends; 2118 nrecvs = redund->nrecvs; 2119 send_rank = redund->send_rank; recv_rank = send_rank + size; 2120 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2121 sbuf_j = redund->sbuf_j; 2122 sbuf_a = redund->sbuf_a; 2123 rbuf_j = redund->rbuf_j; 2124 rbuf_a = redund->rbuf_a; 2125 } 2126 2127 if (reuse == MAT_INITIAL_MATRIX){ 2128 PetscMPIInt subrank,subsize; 2129 PetscInt nleftover,np_subcomm; 2130 /* get the destination processors' id send_rank, nsends and nrecvs */ 2131 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2132 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2133 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2134 recv_rank = send_rank + size; 2135 np_subcomm = size/nsubcomm; 2136 nleftover = size - nsubcomm*np_subcomm; 2137 nsends = 0; nrecvs = 0; 2138 for (i=0; i<size; i++){ /* i=rank*/ 2139 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2140 send_rank[nsends] = i; nsends++; 2141 recv_rank[nrecvs++] = i; 2142 } 2143 } 2144 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2145 i = size-nleftover-1; 2146 j = 0; 2147 while (j < nsubcomm - nleftover){ 2148 send_rank[nsends++] = i; 2149 i--; j++; 2150 } 2151 } 2152 2153 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2154 for (i=0; i<nleftover; i++){ 2155 recv_rank[nrecvs++] = size-nleftover+i; 2156 } 2157 } 2158 2159 /* allocate sbuf_j, sbuf_a */ 2160 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2161 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2162 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2163 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2164 2165 /* copy mat's local entries into the buffers */ 2166 if (reuse == MAT_INITIAL_MATRIX){ 2167 rownz_max = 0; 2168 rptr = sbuf_j; 2169 cols = sbuf_j + rend-rstart + 1; 2170 vals = sbuf_a; 2171 rptr[0] = 0; 2172 for (i=0; i<rend-rstart; i++){ 2173 row = i + rstart; 2174 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2175 ncols = nzA + nzB; 2176 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2177 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2178 /* load the column indices for this row into cols */ 2179 lwrite = 0; 2180 for (l=0; l<nzB; l++) { 2181 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2182 vals[lwrite] = aworkB[l]; 2183 cols[lwrite++] = ctmp; 2184 } 2185 } 2186 for (l=0; l<nzA; l++){ 2187 vals[lwrite] = aworkA[l]; 2188 cols[lwrite++] = cstart + cworkA[l]; 2189 } 2190 for (l=0; l<nzB; l++) { 2191 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2192 vals[lwrite] = aworkB[l]; 2193 cols[lwrite++] = ctmp; 2194 } 2195 } 2196 vals += ncols; 2197 cols += ncols; 2198 rptr[i+1] = rptr[i] + ncols; 2199 if (rownz_max < ncols) rownz_max = ncols; 2200 } 2201 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2202 } else { /* only copy matrix values into sbuf_a */ 2203 rptr = sbuf_j; 2204 vals = sbuf_a; 2205 rptr[0] = 0; 2206 for (i=0; i<rend-rstart; i++){ 2207 row = i + rstart; 2208 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2209 ncols = nzA + nzB; 2210 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2211 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2212 lwrite = 0; 2213 for (l=0; l<nzB; l++) { 2214 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2215 } 2216 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2217 for (l=0; l<nzB; l++) { 2218 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2219 } 2220 vals += ncols; 2221 rptr[i+1] = rptr[i] + ncols; 2222 } 2223 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2224 2225 /* send nzlocal to others, and recv other's nzlocal */ 2226 /*--------------------------------------------------*/ 2227 if (reuse == MAT_INITIAL_MATRIX){ 2228 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2229 s_waits2 = s_waits3 + nsends; 2230 s_waits1 = s_waits2 + nsends; 2231 r_waits1 = s_waits1 + nsends; 2232 r_waits2 = r_waits1 + nrecvs; 2233 r_waits3 = r_waits2 + nrecvs; 2234 } else { 2235 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2236 r_waits3 = s_waits3 + nsends; 2237 } 2238 2239 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2240 if (reuse == MAT_INITIAL_MATRIX){ 2241 /* get new tags to keep the communication clean */ 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2243 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2244 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2245 rbuf_nz = sbuf_nz + nsends; 2246 2247 /* post receives of other's nzlocal */ 2248 for (i=0; i<nrecvs; i++){ 2249 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2250 } 2251 /* send nzlocal to others */ 2252 for (i=0; i<nsends; i++){ 2253 sbuf_nz[i] = nzlocal; 2254 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2255 } 2256 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2257 count = nrecvs; 2258 while (count) { 2259 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2260 recv_rank[imdex] = recv_status.MPI_SOURCE; 2261 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2262 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2263 2264 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2265 rbuf_nz[imdex] += i + 2; 2266 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2267 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2268 count--; 2269 } 2270 /* wait on sends of nzlocal */ 2271 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2272 /* send mat->i,j to others, and recv from other's */ 2273 /*------------------------------------------------*/ 2274 for (i=0; i<nsends; i++){ 2275 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2276 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2277 } 2278 /* wait on receives of mat->i,j */ 2279 /*------------------------------*/ 2280 count = nrecvs; 2281 while (count) { 2282 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2283 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2284 count--; 2285 } 2286 /* wait on sends of mat->i,j */ 2287 /*---------------------------*/ 2288 if (nsends) { 2289 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2290 } 2291 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2292 2293 /* post receives, send and receive mat->a */ 2294 /*----------------------------------------*/ 2295 for (imdex=0; imdex<nrecvs; imdex++) { 2296 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2297 } 2298 for (i=0; i<nsends; i++){ 2299 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2300 } 2301 count = nrecvs; 2302 while (count) { 2303 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2304 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2305 count--; 2306 } 2307 if (nsends) { 2308 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2309 } 2310 2311 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2312 2313 /* create redundant matrix */ 2314 /*-------------------------*/ 2315 if (reuse == MAT_INITIAL_MATRIX){ 2316 /* compute rownz_max for preallocation */ 2317 for (imdex=0; imdex<nrecvs; imdex++){ 2318 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2319 rptr = rbuf_j[imdex]; 2320 for (i=0; i<j; i++){ 2321 ncols = rptr[i+1] - rptr[i]; 2322 if (rownz_max < ncols) rownz_max = ncols; 2323 } 2324 } 2325 2326 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2327 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2328 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2329 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2330 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2331 } else { 2332 C = *matredundant; 2333 } 2334 2335 /* insert local matrix entries */ 2336 rptr = sbuf_j; 2337 cols = sbuf_j + rend-rstart + 1; 2338 vals = sbuf_a; 2339 for (i=0; i<rend-rstart; i++){ 2340 row = i + rstart; 2341 ncols = rptr[i+1] - rptr[i]; 2342 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2343 vals += ncols; 2344 cols += ncols; 2345 } 2346 /* insert received matrix entries */ 2347 for (imdex=0; imdex<nrecvs; imdex++){ 2348 rstart = rowrange[recv_rank[imdex]]; 2349 rend = rowrange[recv_rank[imdex]+1]; 2350 rptr = rbuf_j[imdex]; 2351 cols = rbuf_j[imdex] + rend-rstart + 1; 2352 vals = rbuf_a[imdex]; 2353 for (i=0; i<rend-rstart; i++){ 2354 row = i + rstart; 2355 ncols = rptr[i+1] - rptr[i]; 2356 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2357 vals += ncols; 2358 cols += ncols; 2359 } 2360 } 2361 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2362 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2363 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2364 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2365 if (reuse == MAT_INITIAL_MATRIX){ 2366 PetscContainer container; 2367 *matredundant = C; 2368 /* create a supporting struct and attach it to C for reuse */ 2369 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2370 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2372 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2373 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2374 2375 redund->nzlocal = nzlocal; 2376 redund->nsends = nsends; 2377 redund->nrecvs = nrecvs; 2378 redund->send_rank = send_rank; 2379 redund->sbuf_nz = sbuf_nz; 2380 redund->sbuf_j = sbuf_j; 2381 redund->sbuf_a = sbuf_a; 2382 redund->rbuf_j = rbuf_j; 2383 redund->rbuf_a = rbuf_a; 2384 2385 redund->MatDestroy = C->ops->destroy; 2386 C->ops->destroy = MatDestroy_MatRedundant; 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->rmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++){ 2418 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 if (idxb) { 2427 ierr = PetscFree(idxb);CHKERRQ(ierr); 2428 } 2429 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 PetscErrorCode ierr; 2439 PetscInt i,*idxb = 0; 2440 PetscScalar *va,*vb; 2441 Vec vtmp; 2442 2443 PetscFunctionBegin; 2444 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2446 if (idx) { 2447 for (i=0; i<A->cmap->n; i++) { 2448 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2449 } 2450 } 2451 2452 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2453 if (idx) { 2454 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2455 } 2456 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2457 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2458 2459 for (i=0; i<A->rmap->n; i++){ 2460 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2461 va[i] = vb[i]; 2462 if (idx) idx[i] = a->garray[idxb[i]]; 2463 } 2464 } 2465 2466 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2468 if (idxb) { 2469 ierr = PetscFree(idxb);CHKERRQ(ierr); 2470 } 2471 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2478 { 2479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2480 PetscInt n = A->rmap->n; 2481 PetscInt cstart = A->cmap->rstart; 2482 PetscInt *cmap = mat->garray; 2483 PetscInt *diagIdx, *offdiagIdx; 2484 Vec diagV, offdiagV; 2485 PetscScalar *a, *diagA, *offdiagA; 2486 PetscInt r; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2492 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2493 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2494 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2496 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2497 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2498 for(r = 0; r < n; ++r) { 2499 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2500 a[r] = diagA[r]; 2501 idx[r] = cstart + diagIdx[r]; 2502 } else { 2503 a[r] = offdiagA[r]; 2504 idx[r] = cmap[offdiagIdx[r]]; 2505 } 2506 } 2507 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2508 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2509 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2510 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2511 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2512 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2519 { 2520 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2521 PetscInt n = A->rmap->n; 2522 PetscInt cstart = A->cmap->rstart; 2523 PetscInt *cmap = mat->garray; 2524 PetscInt *diagIdx, *offdiagIdx; 2525 Vec diagV, offdiagV; 2526 PetscScalar *a, *diagA, *offdiagA; 2527 PetscInt r; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2532 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2533 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2534 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2535 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2537 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2538 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2539 for(r = 0; r < n; ++r) { 2540 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 idx[r] = cstart + diagIdx[r]; 2543 } else { 2544 a[r] = offdiagA[r]; 2545 idx[r] = cmap[offdiagIdx[r]]; 2546 } 2547 } 2548 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2551 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2552 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2553 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2560 { 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 /* -------------------------------------------------------------------*/ 2569 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2570 MatGetRow_MPIAIJ, 2571 MatRestoreRow_MPIAIJ, 2572 MatMult_MPIAIJ, 2573 /* 4*/ MatMultAdd_MPIAIJ, 2574 MatMultTranspose_MPIAIJ, 2575 MatMultTransposeAdd_MPIAIJ, 2576 #ifdef PETSC_HAVE_PBGL 2577 MatSolve_MPIAIJ, 2578 #else 2579 0, 2580 #endif 2581 0, 2582 0, 2583 /*10*/ 0, 2584 0, 2585 0, 2586 MatRelax_MPIAIJ, 2587 MatTranspose_MPIAIJ, 2588 /*15*/ MatGetInfo_MPIAIJ, 2589 MatEqual_MPIAIJ, 2590 MatGetDiagonal_MPIAIJ, 2591 MatDiagonalScale_MPIAIJ, 2592 MatNorm_MPIAIJ, 2593 /*20*/ MatAssemblyBegin_MPIAIJ, 2594 MatAssemblyEnd_MPIAIJ, 2595 MatSetOption_MPIAIJ, 2596 MatZeroEntries_MPIAIJ, 2597 /*24*/ MatZeroRows_MPIAIJ, 2598 0, 2599 #ifdef PETSC_HAVE_PBGL 2600 0, 2601 #else 2602 0, 2603 #endif 2604 0, 2605 0, 2606 /*29*/ MatSetUpPreallocation_MPIAIJ, 2607 #ifdef PETSC_HAVE_PBGL 2608 0, 2609 #else 2610 0, 2611 #endif 2612 0, 2613 0, 2614 0, 2615 /*34*/ MatDuplicate_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*39*/ MatAXPY_MPIAIJ, 2621 MatGetSubMatrices_MPIAIJ, 2622 MatIncreaseOverlap_MPIAIJ, 2623 MatGetValues_MPIAIJ, 2624 MatCopy_MPIAIJ, 2625 /*44*/ MatGetRowMax_MPIAIJ, 2626 MatScale_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 /*49*/ MatSetBlockSize_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*54*/ MatFDColoringCreate_MPIAIJ, 2636 0, 2637 MatSetUnfactored_MPIAIJ, 2638 MatPermute_MPIAIJ, 2639 0, 2640 /*59*/ MatGetSubMatrix_MPIAIJ, 2641 MatDestroy_MPIAIJ, 2642 MatView_MPIAIJ, 2643 0, 2644 0, 2645 /*64*/ 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2651 MatGetRowMinAbs_MPIAIJ, 2652 0, 2653 MatSetColoring_MPIAIJ, 2654 #if defined(PETSC_HAVE_ADIC) 2655 MatSetValuesAdic_MPIAIJ, 2656 #else 2657 0, 2658 #endif 2659 MatSetValuesAdifor_MPIAIJ, 2660 /*75*/ 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_Basic, 2678 MatPtAPSymbolic_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*99*/ 0, 2685 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2686 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 MatGetRedundantMatrix_MPIAIJ, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 0, 2699 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2700 0, 2701 0, 2702 0, 2703 0, 2704 0 2705 }; 2706 2707 /* ----------------------------------------------------------------------------------------*/ 2708 2709 EXTERN_C_BEGIN 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2712 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2713 { 2714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 EXTERN_C_END 2723 2724 EXTERN_C_BEGIN 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2727 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2728 { 2729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2734 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 EXTERN_C_END 2738 2739 #include "petscpc.h" 2740 EXTERN_C_BEGIN 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2743 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2744 { 2745 Mat_MPIAIJ *b; 2746 PetscErrorCode ierr; 2747 PetscInt i; 2748 2749 PetscFunctionBegin; 2750 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2751 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2752 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2753 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2754 2755 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2756 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2757 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2758 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2759 if (d_nnz) { 2760 for (i=0; i<B->rmap->n; i++) { 2761 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2762 } 2763 } 2764 if (o_nnz) { 2765 for (i=0; i<B->rmap->n; i++) { 2766 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2767 } 2768 } 2769 b = (Mat_MPIAIJ*)B->data; 2770 2771 if (!B->preallocated) { 2772 /* Explicitly create 2 MATSEQAIJ matrices. */ 2773 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2774 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2775 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2776 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2777 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2778 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2779 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2781 } 2782 2783 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2784 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2785 B->preallocated = PETSC_TRUE; 2786 PetscFunctionReturn(0); 2787 } 2788 EXTERN_C_END 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2793 { 2794 Mat mat; 2795 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 *newmat = 0; 2800 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2801 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2802 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2803 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2804 a = (Mat_MPIAIJ*)mat->data; 2805 2806 mat->factor = matin->factor; 2807 mat->rmap->bs = matin->rmap->bs; 2808 mat->assembled = PETSC_TRUE; 2809 mat->insertmode = NOT_SET_VALUES; 2810 mat->preallocated = PETSC_TRUE; 2811 2812 a->size = oldmat->size; 2813 a->rank = oldmat->rank; 2814 a->donotstash = oldmat->donotstash; 2815 a->roworiented = oldmat->roworiented; 2816 a->rowindices = 0; 2817 a->rowvalues = 0; 2818 a->getrowactive = PETSC_FALSE; 2819 2820 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2821 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2822 2823 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2824 if (oldmat->colmap) { 2825 #if defined (PETSC_USE_CTABLE) 2826 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2827 #else 2828 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2829 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2830 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2831 #endif 2832 } else a->colmap = 0; 2833 if (oldmat->garray) { 2834 PetscInt len; 2835 len = oldmat->B->cmap->n; 2836 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2837 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2838 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2839 } else a->garray = 0; 2840 2841 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2843 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2844 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2845 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2846 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2847 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2848 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2849 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2850 *newmat = mat; 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #include "petscsys.h" 2855 2856 #undef __FUNCT__ 2857 #define __FUNCT__ "MatLoad_MPIAIJ" 2858 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2859 { 2860 Mat A; 2861 PetscScalar *vals,*svals; 2862 MPI_Comm comm = ((PetscObject)viewer)->comm; 2863 MPI_Status status; 2864 PetscErrorCode ierr; 2865 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2866 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2867 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2868 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2869 PetscInt cend,cstart,n,*rowners; 2870 int fd; 2871 2872 PetscFunctionBegin; 2873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2874 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2875 if (!rank) { 2876 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2877 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2878 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2879 } 2880 2881 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2882 M = header[1]; N = header[2]; 2883 /* determine ownership of all rows */ 2884 m = M/size + ((M % size) > rank); 2885 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2886 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2887 2888 /* First process needs enough room for process with most rows */ 2889 if (!rank) { 2890 mmax = rowners[1]; 2891 for (i=2; i<size; i++) { 2892 mmax = PetscMax(mmax,rowners[i]); 2893 } 2894 } else mmax = m; 2895 2896 rowners[0] = 0; 2897 for (i=2; i<=size; i++) { 2898 rowners[i] += rowners[i-1]; 2899 } 2900 rstart = rowners[rank]; 2901 rend = rowners[rank+1]; 2902 2903 /* distribute row lengths to all processors */ 2904 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2905 if (!rank) { 2906 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2907 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2908 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2909 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2910 for (j=0; j<m; j++) { 2911 procsnz[0] += ourlens[j]; 2912 } 2913 for (i=1; i<size; i++) { 2914 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2915 /* calculate the number of nonzeros on each processor */ 2916 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2917 procsnz[i] += rowlengths[j]; 2918 } 2919 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2920 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2921 } 2922 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2923 } else { 2924 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2925 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2926 } 2927 2928 if (!rank) { 2929 /* determine max buffer needed and allocate it */ 2930 maxnz = 0; 2931 for (i=0; i<size; i++) { 2932 maxnz = PetscMax(maxnz,procsnz[i]); 2933 } 2934 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2935 2936 /* read in my part of the matrix column indices */ 2937 nz = procsnz[0]; 2938 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2939 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2940 2941 /* read in every one elses and ship off */ 2942 for (i=1; i<size; i++) { 2943 nz = procsnz[i]; 2944 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2945 mpicnt = PetscMPIIntCast(nz); 2946 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2947 } 2948 ierr = PetscFree(cols);CHKERRQ(ierr); 2949 } else { 2950 /* determine buffer space needed for message */ 2951 nz = 0; 2952 for (i=0; i<m; i++) { 2953 nz += ourlens[i]; 2954 } 2955 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2956 2957 /* receive message of column indices*/ 2958 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2959 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2960 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2961 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2962 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2963 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2964 } 2965 2966 /* determine column ownership if matrix is not square */ 2967 if (N != M) { 2968 n = N/size + ((N % size) > rank); 2969 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2970 cstart = cend - n; 2971 } else { 2972 cstart = rstart; 2973 cend = rend; 2974 n = cend - cstart; 2975 } 2976 2977 /* loop over local rows, determining number of off diagonal entries */ 2978 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2979 jj = 0; 2980 for (i=0; i<m; i++) { 2981 for (j=0; j<ourlens[i]; j++) { 2982 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2983 jj++; 2984 } 2985 } 2986 2987 /* create our matrix */ 2988 for (i=0; i<m; i++) { 2989 ourlens[i] -= offlens[i]; 2990 } 2991 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2992 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2993 ierr = MatSetType(A,type);CHKERRQ(ierr); 2994 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2995 2996 for (i=0; i<m; i++) { 2997 ourlens[i] += offlens[i]; 2998 } 2999 3000 if (!rank) { 3001 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3002 3003 /* read in my part of the matrix numerical values */ 3004 nz = procsnz[0]; 3005 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3006 3007 /* insert into matrix */ 3008 jj = rstart; 3009 smycols = mycols; 3010 svals = vals; 3011 for (i=0; i<m; i++) { 3012 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3013 smycols += ourlens[i]; 3014 svals += ourlens[i]; 3015 jj++; 3016 } 3017 3018 /* read in other processors and ship out */ 3019 for (i=1; i<size; i++) { 3020 nz = procsnz[i]; 3021 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3022 mpicnt = PetscMPIIntCast(nz); 3023 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3024 } 3025 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3026 } else { 3027 /* receive numeric values */ 3028 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3029 3030 /* receive message of values*/ 3031 mpicnt = PetscMPIIntCast(nz); 3032 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3033 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3034 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3035 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3036 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3037 3038 /* insert into matrix */ 3039 jj = rstart; 3040 smycols = mycols; 3041 svals = vals; 3042 for (i=0; i<m; i++) { 3043 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3044 smycols += ourlens[i]; 3045 svals += ourlens[i]; 3046 jj++; 3047 } 3048 } 3049 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3050 ierr = PetscFree(vals);CHKERRQ(ierr); 3051 ierr = PetscFree(mycols);CHKERRQ(ierr); 3052 ierr = PetscFree(rowners);CHKERRQ(ierr); 3053 3054 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 *newmat = A; 3057 PetscFunctionReturn(0); 3058 } 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3062 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3063 { 3064 PetscErrorCode ierr; 3065 IS iscol_local; 3066 PetscInt csize; 3067 3068 PetscFunctionBegin; 3069 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3070 if (call == MAT_REUSE_MATRIX) { 3071 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3072 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3073 } else { 3074 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3075 } 3076 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3077 if (call == MAT_INITIAL_MATRIX) { 3078 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3079 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3080 } 3081 PetscFunctionReturn(0); 3082 } 3083 3084 #undef __FUNCT__ 3085 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3086 /* 3087 Not great since it makes two copies of the submatrix, first an SeqAIJ 3088 in local and then by concatenating the local matrices the end result. 3089 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3090 3091 Note: This requires a sequential iscol with all indices. 3092 */ 3093 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3094 { 3095 PetscErrorCode ierr; 3096 PetscMPIInt rank,size; 3097 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3098 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3099 Mat *local,M,Mreuse; 3100 MatScalar *vwork,*aa; 3101 MPI_Comm comm = ((PetscObject)mat)->comm; 3102 Mat_SeqAIJ *aij; 3103 3104 3105 PetscFunctionBegin; 3106 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3107 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3108 3109 if (call == MAT_REUSE_MATRIX) { 3110 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3111 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3112 local = &Mreuse; 3113 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3114 } else { 3115 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3116 Mreuse = *local; 3117 ierr = PetscFree(local);CHKERRQ(ierr); 3118 } 3119 3120 /* 3121 m - number of local rows 3122 n - number of columns (same on all processors) 3123 rstart - first row in new global matrix generated 3124 */ 3125 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3126 if (call == MAT_INITIAL_MATRIX) { 3127 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3128 ii = aij->i; 3129 jj = aij->j; 3130 3131 /* 3132 Determine the number of non-zeros in the diagonal and off-diagonal 3133 portions of the matrix in order to do correct preallocation 3134 */ 3135 3136 /* first get start and end of "diagonal" columns */ 3137 if (csize == PETSC_DECIDE) { 3138 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3139 if (mglobal == n) { /* square matrix */ 3140 nlocal = m; 3141 } else { 3142 nlocal = n/size + ((n % size) > rank); 3143 } 3144 } else { 3145 nlocal = csize; 3146 } 3147 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3148 rstart = rend - nlocal; 3149 if (rank == size - 1 && rend != n) { 3150 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3151 } 3152 3153 /* next, compute all the lengths */ 3154 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3155 olens = dlens + m; 3156 for (i=0; i<m; i++) { 3157 jend = ii[i+1] - ii[i]; 3158 olen = 0; 3159 dlen = 0; 3160 for (j=0; j<jend; j++) { 3161 if (*jj < rstart || *jj >= rend) olen++; 3162 else dlen++; 3163 jj++; 3164 } 3165 olens[i] = olen; 3166 dlens[i] = dlen; 3167 } 3168 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3169 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3170 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3171 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3172 ierr = PetscFree(dlens);CHKERRQ(ierr); 3173 } else { 3174 PetscInt ml,nl; 3175 3176 M = *newmat; 3177 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3178 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3179 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3180 /* 3181 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3182 rather than the slower MatSetValues(). 3183 */ 3184 M->was_assembled = PETSC_TRUE; 3185 M->assembled = PETSC_FALSE; 3186 } 3187 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3188 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3189 ii = aij->i; 3190 jj = aij->j; 3191 aa = aij->a; 3192 for (i=0; i<m; i++) { 3193 row = rstart + i; 3194 nz = ii[i+1] - ii[i]; 3195 cwork = jj; jj += nz; 3196 vwork = aa; aa += nz; 3197 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3198 } 3199 3200 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3201 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3202 *newmat = M; 3203 3204 /* save submatrix used in processor for next request */ 3205 if (call == MAT_INITIAL_MATRIX) { 3206 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3207 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3208 } 3209 3210 PetscFunctionReturn(0); 3211 } 3212 3213 EXTERN_C_BEGIN 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3216 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3217 { 3218 PetscInt m,cstart, cend,j,nnz,i,d; 3219 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3220 const PetscInt *JJ; 3221 PetscScalar *values; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3226 3227 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3228 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3229 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3230 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3231 m = B->rmap->n; 3232 cstart = B->cmap->rstart; 3233 cend = B->cmap->rend; 3234 rstart = B->rmap->rstart; 3235 3236 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3237 o_nnz = d_nnz + m; 3238 3239 #if defined(PETSC_USE_DEBUGGING) 3240 for (i=0; i<m; i++) { 3241 nnz = Ii[i+1]- Ii[i]; 3242 JJ = J + Ii[i]; 3243 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3244 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3245 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3246 for (j=1; j<nnz; j++) { 3247 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3248 } 3249 } 3250 #endif 3251 3252 for (i=0; i<m; i++) { 3253 nnz = Ii[i+1]- Ii[i]; 3254 JJ = J + Ii[i]; 3255 nnz_max = PetscMax(nnz_max,nnz); 3256 for (j=0; j<nnz; j++) { 3257 if (*JJ >= cstart) break; 3258 JJ++; 3259 } 3260 d = 0; 3261 for (; j<nnz; j++) { 3262 if (*JJ++ >= cend) break; 3263 d++; 3264 } 3265 d_nnz[i] = d; 3266 o_nnz[i] = nnz - d; 3267 } 3268 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3269 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3270 3271 if (v) values = (PetscScalar*)v; 3272 else { 3273 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3274 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3275 } 3276 3277 for (i=0; i<m; i++) { 3278 ii = i + rstart; 3279 nnz = Ii[i+1]- Ii[i]; 3280 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3281 } 3282 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3283 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 3285 if (!v) { 3286 ierr = PetscFree(values);CHKERRQ(ierr); 3287 } 3288 PetscFunctionReturn(0); 3289 } 3290 EXTERN_C_END 3291 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3294 /*@ 3295 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3296 (the default parallel PETSc format). 3297 3298 Collective on MPI_Comm 3299 3300 Input Parameters: 3301 + B - the matrix 3302 . i - the indices into j for the start of each local row (starts with zero) 3303 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3304 - v - optional values in the matrix 3305 3306 Level: developer 3307 3308 Notes: 3309 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3310 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3311 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3312 3313 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3314 3315 The format which is used for the sparse matrix input, is equivalent to a 3316 row-major ordering.. i.e for the following matrix, the input data expected is 3317 as shown: 3318 3319 1 0 0 3320 2 0 3 P0 3321 ------- 3322 4 5 6 P1 3323 3324 Process0 [P0]: rows_owned=[0,1] 3325 i = {0,1,3} [size = nrow+1 = 2+1] 3326 j = {0,0,2} [size = nz = 6] 3327 v = {1,2,3} [size = nz = 6] 3328 3329 Process1 [P1]: rows_owned=[2] 3330 i = {0,3} [size = nrow+1 = 1+1] 3331 j = {0,1,2} [size = nz = 6] 3332 v = {4,5,6} [size = nz = 6] 3333 3334 The column indices for each row MUST be sorted. 3335 3336 .keywords: matrix, aij, compressed row, sparse, parallel 3337 3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3339 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3340 @*/ 3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3342 { 3343 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3344 3345 PetscFunctionBegin; 3346 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3347 if (f) { 3348 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3349 } 3350 PetscFunctionReturn(0); 3351 } 3352 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3355 /*@C 3356 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3357 (the default parallel PETSc format). For good matrix assembly performance 3358 the user should preallocate the matrix storage by setting the parameters 3359 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3360 performance can be increased by more than a factor of 50. 3361 3362 Collective on MPI_Comm 3363 3364 Input Parameters: 3365 + A - the matrix 3366 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3367 (same value is used for all local rows) 3368 . d_nnz - array containing the number of nonzeros in the various rows of the 3369 DIAGONAL portion of the local submatrix (possibly different for each row) 3370 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3371 The size of this array is equal to the number of local rows, i.e 'm'. 3372 You must leave room for the diagonal entry even if it is zero. 3373 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3374 submatrix (same value is used for all local rows). 3375 - o_nnz - array containing the number of nonzeros in the various rows of the 3376 OFF-DIAGONAL portion of the local submatrix (possibly different for 3377 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3378 structure. The size of this array is equal to the number 3379 of local rows, i.e 'm'. 3380 3381 If the *_nnz parameter is given then the *_nz parameter is ignored 3382 3383 The AIJ format (also called the Yale sparse matrix format or 3384 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3385 storage. The stored row and column indices begin with zero. See the users manual for details. 3386 3387 The parallel matrix is partitioned such that the first m0 rows belong to 3388 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3389 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3390 3391 The DIAGONAL portion of the local submatrix of a processor can be defined 3392 as the submatrix which is obtained by extraction the part corresponding 3393 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3394 first row that belongs to the processor, and r2 is the last row belonging 3395 to the this processor. This is a square mxm matrix. The remaining portion 3396 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3397 3398 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3399 3400 You can call MatGetInfo() to get information on how effective the preallocation was; 3401 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3402 You can also run with the option -info and look for messages with the string 3403 malloc in them to see if additional memory allocation was needed. 3404 3405 Example usage: 3406 3407 Consider the following 8x8 matrix with 34 non-zero values, that is 3408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3410 as follows: 3411 3412 .vb 3413 1 2 0 | 0 3 0 | 0 4 3414 Proc0 0 5 6 | 7 0 0 | 8 0 3415 9 0 10 | 11 0 0 | 12 0 3416 ------------------------------------- 3417 13 0 14 | 15 16 17 | 0 0 3418 Proc1 0 18 0 | 19 20 21 | 0 0 3419 0 0 0 | 22 23 0 | 24 0 3420 ------------------------------------- 3421 Proc2 25 26 27 | 0 0 28 | 29 0 3422 30 0 0 | 31 32 33 | 0 34 3423 .ve 3424 3425 This can be represented as a collection of submatrices as: 3426 3427 .vb 3428 A B C 3429 D E F 3430 G H I 3431 .ve 3432 3433 Where the submatrices A,B,C are owned by proc0, D,E,F are 3434 owned by proc1, G,H,I are owned by proc2. 3435 3436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3438 The 'M','N' parameters are 8,8, and have the same values on all procs. 3439 3440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3445 matrix, ans [DF] as another SeqAIJ matrix. 3446 3447 When d_nz, o_nz parameters are specified, d_nz storage elements are 3448 allocated for every row of the local diagonal submatrix, and o_nz 3449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3450 One way to choose d_nz and o_nz is to use the max nonzerors per local 3451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3452 In this case, the values of d_nz,o_nz are: 3453 .vb 3454 proc0 : dnz = 2, o_nz = 2 3455 proc1 : dnz = 3, o_nz = 2 3456 proc2 : dnz = 1, o_nz = 4 3457 .ve 3458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3460 for proc3. i.e we are using 12+15+10=37 storage locations to store 3461 34 values. 3462 3463 When d_nnz, o_nnz parameters are specified, the storage is specified 3464 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3465 In the above case the values for d_nnz,o_nnz are: 3466 .vb 3467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3470 .ve 3471 Here the space allocated is sum of all the above values i.e 34, and 3472 hence pre-allocation is perfect. 3473 3474 Level: intermediate 3475 3476 .keywords: matrix, aij, compressed row, sparse, parallel 3477 3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3479 MPIAIJ, MatGetInfo() 3480 @*/ 3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3482 { 3483 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3484 3485 PetscFunctionBegin; 3486 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3487 if (f) { 3488 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3489 } 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3495 /*@ 3496 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3497 CSR format the local rows. 3498 3499 Collective on MPI_Comm 3500 3501 Input Parameters: 3502 + comm - MPI communicator 3503 . m - number of local rows (Cannot be PETSC_DECIDE) 3504 . n - This value should be the same as the local size used in creating the 3505 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3506 calculated if N is given) For square matrices n is almost always m. 3507 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3508 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3509 . i - row indices 3510 . j - column indices 3511 - a - matrix values 3512 3513 Output Parameter: 3514 . mat - the matrix 3515 3516 Level: intermediate 3517 3518 Notes: 3519 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3520 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3521 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3522 3523 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3524 3525 The format which is used for the sparse matrix input, is equivalent to a 3526 row-major ordering.. i.e for the following matrix, the input data expected is 3527 as shown: 3528 3529 1 0 0 3530 2 0 3 P0 3531 ------- 3532 4 5 6 P1 3533 3534 Process0 [P0]: rows_owned=[0,1] 3535 i = {0,1,3} [size = nrow+1 = 2+1] 3536 j = {0,0,2} [size = nz = 6] 3537 v = {1,2,3} [size = nz = 6] 3538 3539 Process1 [P1]: rows_owned=[2] 3540 i = {0,3} [size = nrow+1 = 1+1] 3541 j = {0,1,2} [size = nz = 6] 3542 v = {4,5,6} [size = nz = 6] 3543 3544 .keywords: matrix, aij, compressed row, sparse, parallel 3545 3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3547 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3548 @*/ 3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3550 { 3551 PetscErrorCode ierr; 3552 3553 PetscFunctionBegin; 3554 if (i[0]) { 3555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3556 } 3557 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3558 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3560 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3561 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatCreateMPIAIJ" 3567 /*@C 3568 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3569 (the default parallel PETSc format). For good matrix assembly performance 3570 the user should preallocate the matrix storage by setting the parameters 3571 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3572 performance can be increased by more than a factor of 50. 3573 3574 Collective on MPI_Comm 3575 3576 Input Parameters: 3577 + comm - MPI communicator 3578 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3579 This value should be the same as the local size used in creating the 3580 y vector for the matrix-vector product y = Ax. 3581 . n - This value should be the same as the local size used in creating the 3582 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3583 calculated if N is given) For square matrices n is almost always m. 3584 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3585 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3586 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3587 (same value is used for all local rows) 3588 . d_nnz - array containing the number of nonzeros in the various rows of the 3589 DIAGONAL portion of the local submatrix (possibly different for each row) 3590 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3591 The size of this array is equal to the number of local rows, i.e 'm'. 3592 You must leave room for the diagonal entry even if it is zero. 3593 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3594 submatrix (same value is used for all local rows). 3595 - o_nnz - array containing the number of nonzeros in the various rows of the 3596 OFF-DIAGONAL portion of the local submatrix (possibly different for 3597 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3598 structure. The size of this array is equal to the number 3599 of local rows, i.e 'm'. 3600 3601 Output Parameter: 3602 . A - the matrix 3603 3604 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3605 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3606 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3607 3608 Notes: 3609 If the *_nnz parameter is given then the *_nz parameter is ignored 3610 3611 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3612 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3613 storage requirements for this matrix. 3614 3615 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3616 processor than it must be used on all processors that share the object for 3617 that argument. 3618 3619 The user MUST specify either the local or global matrix dimensions 3620 (possibly both). 3621 3622 The parallel matrix is partitioned across processors such that the 3623 first m0 rows belong to process 0, the next m1 rows belong to 3624 process 1, the next m2 rows belong to process 2 etc.. where 3625 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3626 values corresponding to [m x N] submatrix. 3627 3628 The columns are logically partitioned with the n0 columns belonging 3629 to 0th partition, the next n1 columns belonging to the next 3630 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3631 3632 The DIAGONAL portion of the local submatrix on any given processor 3633 is the submatrix corresponding to the rows and columns m,n 3634 corresponding to the given processor. i.e diagonal matrix on 3635 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3636 etc. The remaining portion of the local submatrix [m x (N-n)] 3637 constitute the OFF-DIAGONAL portion. The example below better 3638 illustrates this concept. 3639 3640 For a square global matrix we define each processor's diagonal portion 3641 to be its local rows and the corresponding columns (a square submatrix); 3642 each processor's off-diagonal portion encompasses the remainder of the 3643 local matrix (a rectangular submatrix). 3644 3645 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3646 3647 When calling this routine with a single process communicator, a matrix of 3648 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3649 type of communicator, use the construction mechanism: 3650 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3651 3652 By default, this format uses inodes (identical nodes) when possible. 3653 We search for consecutive rows with the same nonzero structure, thereby 3654 reusing matrix information to achieve increased efficiency. 3655 3656 Options Database Keys: 3657 + -mat_no_inode - Do not use inodes 3658 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3659 - -mat_aij_oneindex - Internally use indexing starting at 1 3660 rather than 0. Note that when calling MatSetValues(), 3661 the user still MUST index entries starting at 0! 3662 3663 3664 Example usage: 3665 3666 Consider the following 8x8 matrix with 34 non-zero values, that is 3667 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3668 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3669 as follows: 3670 3671 .vb 3672 1 2 0 | 0 3 0 | 0 4 3673 Proc0 0 5 6 | 7 0 0 | 8 0 3674 9 0 10 | 11 0 0 | 12 0 3675 ------------------------------------- 3676 13 0 14 | 15 16 17 | 0 0 3677 Proc1 0 18 0 | 19 20 21 | 0 0 3678 0 0 0 | 22 23 0 | 24 0 3679 ------------------------------------- 3680 Proc2 25 26 27 | 0 0 28 | 29 0 3681 30 0 0 | 31 32 33 | 0 34 3682 .ve 3683 3684 This can be represented as a collection of submatrices as: 3685 3686 .vb 3687 A B C 3688 D E F 3689 G H I 3690 .ve 3691 3692 Where the submatrices A,B,C are owned by proc0, D,E,F are 3693 owned by proc1, G,H,I are owned by proc2. 3694 3695 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3696 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3697 The 'M','N' parameters are 8,8, and have the same values on all procs. 3698 3699 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3700 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3701 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3702 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3703 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3704 matrix, ans [DF] as another SeqAIJ matrix. 3705 3706 When d_nz, o_nz parameters are specified, d_nz storage elements are 3707 allocated for every row of the local diagonal submatrix, and o_nz 3708 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3709 One way to choose d_nz and o_nz is to use the max nonzerors per local 3710 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3711 In this case, the values of d_nz,o_nz are: 3712 .vb 3713 proc0 : dnz = 2, o_nz = 2 3714 proc1 : dnz = 3, o_nz = 2 3715 proc2 : dnz = 1, o_nz = 4 3716 .ve 3717 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3718 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3719 for proc3. i.e we are using 12+15+10=37 storage locations to store 3720 34 values. 3721 3722 When d_nnz, o_nnz parameters are specified, the storage is specified 3723 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3724 In the above case the values for d_nnz,o_nnz are: 3725 .vb 3726 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3727 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3728 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3729 .ve 3730 Here the space allocated is sum of all the above values i.e 34, and 3731 hence pre-allocation is perfect. 3732 3733 Level: intermediate 3734 3735 .keywords: matrix, aij, compressed row, sparse, parallel 3736 3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3738 MPIAIJ, MatCreateMPIAIJWithArrays() 3739 @*/ 3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3741 { 3742 PetscErrorCode ierr; 3743 PetscMPIInt size; 3744 3745 PetscFunctionBegin; 3746 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3747 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3749 if (size > 1) { 3750 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3751 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3752 } else { 3753 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3754 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3755 } 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3762 { 3763 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3764 3765 PetscFunctionBegin; 3766 *Ad = a->A; 3767 *Ao = a->B; 3768 *colmap = a->garray; 3769 PetscFunctionReturn(0); 3770 } 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3775 { 3776 PetscErrorCode ierr; 3777 PetscInt i; 3778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3779 3780 PetscFunctionBegin; 3781 if (coloring->ctype == IS_COLORING_GLOBAL) { 3782 ISColoringValue *allcolors,*colors; 3783 ISColoring ocoloring; 3784 3785 /* set coloring for diagonal portion */ 3786 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3787 3788 /* set coloring for off-diagonal portion */ 3789 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3790 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3791 for (i=0; i<a->B->cmap->n; i++) { 3792 colors[i] = allcolors[a->garray[i]]; 3793 } 3794 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3795 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3796 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3797 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3798 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3799 ISColoringValue *colors; 3800 PetscInt *larray; 3801 ISColoring ocoloring; 3802 3803 /* set coloring for diagonal portion */ 3804 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3805 for (i=0; i<a->A->cmap->n; i++) { 3806 larray[i] = i + A->cmap->rstart; 3807 } 3808 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3809 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3810 for (i=0; i<a->A->cmap->n; i++) { 3811 colors[i] = coloring->colors[larray[i]]; 3812 } 3813 ierr = PetscFree(larray);CHKERRQ(ierr); 3814 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3815 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3816 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3817 3818 /* set coloring for off-diagonal portion */ 3819 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3820 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3821 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3822 for (i=0; i<a->B->cmap->n; i++) { 3823 colors[i] = coloring->colors[larray[i]]; 3824 } 3825 ierr = PetscFree(larray);CHKERRQ(ierr); 3826 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3827 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3828 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3829 } else { 3830 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3831 } 3832 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #if defined(PETSC_HAVE_ADIC) 3837 #undef __FUNCT__ 3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3840 { 3841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3842 PetscErrorCode ierr; 3843 3844 PetscFunctionBegin; 3845 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3846 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3847 PetscFunctionReturn(0); 3848 } 3849 #endif 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3854 { 3855 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3856 PetscErrorCode ierr; 3857 3858 PetscFunctionBegin; 3859 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3860 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatMerge" 3866 /*@ 3867 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3868 matrices from each processor 3869 3870 Collective on MPI_Comm 3871 3872 Input Parameters: 3873 + comm - the communicators the parallel matrix will live on 3874 . inmat - the input sequential matrices 3875 . n - number of local columns (or PETSC_DECIDE) 3876 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3877 3878 Output Parameter: 3879 . outmat - the parallel matrix generated 3880 3881 Level: advanced 3882 3883 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3884 3885 @*/ 3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3887 { 3888 PetscErrorCode ierr; 3889 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3890 PetscInt *indx; 3891 PetscScalar *values; 3892 3893 PetscFunctionBegin; 3894 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3895 if (scall == MAT_INITIAL_MATRIX){ 3896 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3897 if (n == PETSC_DECIDE){ 3898 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3899 } 3900 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3901 rstart -= m; 3902 3903 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3904 for (i=0;i<m;i++) { 3905 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3906 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3907 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3908 } 3909 /* This routine will ONLY return MPIAIJ type matrix */ 3910 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3911 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3912 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3913 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3914 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3915 3916 } else if (scall == MAT_REUSE_MATRIX){ 3917 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3918 } else { 3919 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3920 } 3921 3922 for (i=0;i<m;i++) { 3923 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3924 Ii = i + rstart; 3925 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3926 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3927 } 3928 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3929 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 3932 PetscFunctionReturn(0); 3933 } 3934 3935 #undef __FUNCT__ 3936 #define __FUNCT__ "MatFileSplit" 3937 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3938 { 3939 PetscErrorCode ierr; 3940 PetscMPIInt rank; 3941 PetscInt m,N,i,rstart,nnz; 3942 size_t len; 3943 const PetscInt *indx; 3944 PetscViewer out; 3945 char *name; 3946 Mat B; 3947 const PetscScalar *values; 3948 3949 PetscFunctionBegin; 3950 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3951 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3952 /* Should this be the type of the diagonal block of A? */ 3953 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3954 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3955 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3956 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3957 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3958 for (i=0;i<m;i++) { 3959 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3960 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3961 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3962 } 3963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3965 3966 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3967 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3968 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3969 sprintf(name,"%s.%d",outfile,rank); 3970 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3971 ierr = PetscFree(name); 3972 ierr = MatView(B,out);CHKERRQ(ierr); 3973 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3974 ierr = MatDestroy(B);CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977 3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3979 #undef __FUNCT__ 3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3982 { 3983 PetscErrorCode ierr; 3984 Mat_Merge_SeqsToMPI *merge; 3985 PetscContainer container; 3986 3987 PetscFunctionBegin; 3988 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3989 if (container) { 3990 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 4002 4003 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4004 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4005 } 4006 ierr = PetscFree(merge);CHKERRQ(ierr); 4007 4008 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4009 PetscFunctionReturn(0); 4010 } 4011 4012 #include "../src/mat/utils/freespace.h" 4013 #include "petscbt.h" 4014 4015 #undef __FUNCT__ 4016 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4017 /*@C 4018 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4019 matrices from each processor 4020 4021 Collective on MPI_Comm 4022 4023 Input Parameters: 4024 + comm - the communicators the parallel matrix will live on 4025 . seqmat - the input sequential matrices 4026 . m - number of local rows (or PETSC_DECIDE) 4027 . n - number of local columns (or PETSC_DECIDE) 4028 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4029 4030 Output Parameter: 4031 . mpimat - the parallel matrix generated 4032 4033 Level: advanced 4034 4035 Notes: 4036 The dimensions of the sequential matrix in each processor MUST be the same. 4037 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4038 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4039 @*/ 4040 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4041 { 4042 PetscErrorCode ierr; 4043 MPI_Comm comm=((PetscObject)mpimat)->comm; 4044 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4045 PetscMPIInt size,rank,taga,*len_s; 4046 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4047 PetscInt proc,m; 4048 PetscInt **buf_ri,**buf_rj; 4049 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4050 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4051 MPI_Request *s_waits,*r_waits; 4052 MPI_Status *status; 4053 MatScalar *aa=a->a; 4054 MatScalar **abuf_r,*ba_i; 4055 Mat_Merge_SeqsToMPI *merge; 4056 PetscContainer container; 4057 4058 PetscFunctionBegin; 4059 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4060 4061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4062 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4063 4064 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4065 if (container) { 4066 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4067 } 4068 bi = merge->bi; 4069 bj = merge->bj; 4070 buf_ri = merge->buf_ri; 4071 buf_rj = merge->buf_rj; 4072 4073 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4074 owners = merge->rowmap.range; 4075 len_s = merge->len_s; 4076 4077 /* send and recv matrix values */ 4078 /*-----------------------------*/ 4079 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4080 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4081 4082 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4083 for (proc=0,k=0; proc<size; proc++){ 4084 if (!len_s[proc]) continue; 4085 i = owners[proc]; 4086 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4087 k++; 4088 } 4089 4090 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4091 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4092 ierr = PetscFree(status);CHKERRQ(ierr); 4093 4094 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4095 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4096 4097 /* insert mat values of mpimat */ 4098 /*----------------------------*/ 4099 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4100 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4101 nextrow = buf_ri_k + merge->nrecv; 4102 nextai = nextrow + merge->nrecv; 4103 4104 for (k=0; k<merge->nrecv; k++){ 4105 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4106 nrows = *(buf_ri_k[k]); 4107 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4108 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4109 } 4110 4111 /* set values of ba */ 4112 m = merge->rowmap.n; 4113 for (i=0; i<m; i++) { 4114 arow = owners[rank] + i; 4115 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4116 bnzi = bi[i+1] - bi[i]; 4117 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4118 4119 /* add local non-zero vals of this proc's seqmat into ba */ 4120 anzi = ai[arow+1] - ai[arow]; 4121 aj = a->j + ai[arow]; 4122 aa = a->a + ai[arow]; 4123 nextaj = 0; 4124 for (j=0; nextaj<anzi; j++){ 4125 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4126 ba_i[j] += aa[nextaj++]; 4127 } 4128 } 4129 4130 /* add received vals into ba */ 4131 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4132 /* i-th row */ 4133 if (i == *nextrow[k]) { 4134 anzi = *(nextai[k]+1) - *nextai[k]; 4135 aj = buf_rj[k] + *(nextai[k]); 4136 aa = abuf_r[k] + *(nextai[k]); 4137 nextaj = 0; 4138 for (j=0; nextaj<anzi; j++){ 4139 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4140 ba_i[j] += aa[nextaj++]; 4141 } 4142 } 4143 nextrow[k]++; nextai[k]++; 4144 } 4145 } 4146 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4147 } 4148 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 4151 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4152 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4153 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4154 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4155 PetscFunctionReturn(0); 4156 } 4157 4158 #undef __FUNCT__ 4159 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4160 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4161 { 4162 PetscErrorCode ierr; 4163 Mat B_mpi; 4164 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4165 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4166 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4167 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4168 PetscInt len,proc,*dnz,*onz; 4169 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4170 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4171 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4172 MPI_Status *status; 4173 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4174 PetscBT lnkbt; 4175 Mat_Merge_SeqsToMPI *merge; 4176 PetscContainer container; 4177 4178 PetscFunctionBegin; 4179 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4180 4181 /* make sure it is a PETSc comm */ 4182 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4185 4186 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4187 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4188 4189 /* determine row ownership */ 4190 /*---------------------------------------------------------*/ 4191 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4192 merge->rowmap.n = m; 4193 merge->rowmap.N = M; 4194 merge->rowmap.bs = 1; 4195 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4196 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4197 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4198 4199 m = merge->rowmap.n; 4200 M = merge->rowmap.N; 4201 owners = merge->rowmap.range; 4202 4203 /* determine the number of messages to send, their lengths */ 4204 /*---------------------------------------------------------*/ 4205 len_s = merge->len_s; 4206 4207 len = 0; /* length of buf_si[] */ 4208 merge->nsend = 0; 4209 for (proc=0; proc<size; proc++){ 4210 len_si[proc] = 0; 4211 if (proc == rank){ 4212 len_s[proc] = 0; 4213 } else { 4214 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4215 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4216 } 4217 if (len_s[proc]) { 4218 merge->nsend++; 4219 nrows = 0; 4220 for (i=owners[proc]; i<owners[proc+1]; i++){ 4221 if (ai[i+1] > ai[i]) nrows++; 4222 } 4223 len_si[proc] = 2*(nrows+1); 4224 len += len_si[proc]; 4225 } 4226 } 4227 4228 /* determine the number and length of messages to receive for ij-structure */ 4229 /*-------------------------------------------------------------------------*/ 4230 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4231 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4232 4233 /* post the Irecv of j-structure */ 4234 /*-------------------------------*/ 4235 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4236 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4237 4238 /* post the Isend of j-structure */ 4239 /*--------------------------------*/ 4240 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4241 sj_waits = si_waits + merge->nsend; 4242 4243 for (proc=0, k=0; proc<size; proc++){ 4244 if (!len_s[proc]) continue; 4245 i = owners[proc]; 4246 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4247 k++; 4248 } 4249 4250 /* receives and sends of j-structure are complete */ 4251 /*------------------------------------------------*/ 4252 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4253 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4254 4255 /* send and recv i-structure */ 4256 /*---------------------------*/ 4257 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4258 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4259 4260 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4261 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4262 for (proc=0,k=0; proc<size; proc++){ 4263 if (!len_s[proc]) continue; 4264 /* form outgoing message for i-structure: 4265 buf_si[0]: nrows to be sent 4266 [1:nrows]: row index (global) 4267 [nrows+1:2*nrows+1]: i-structure index 4268 */ 4269 /*-------------------------------------------*/ 4270 nrows = len_si[proc]/2 - 1; 4271 buf_si_i = buf_si + nrows+1; 4272 buf_si[0] = nrows; 4273 buf_si_i[0] = 0; 4274 nrows = 0; 4275 for (i=owners[proc]; i<owners[proc+1]; i++){ 4276 anzi = ai[i+1] - ai[i]; 4277 if (anzi) { 4278 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4279 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4280 nrows++; 4281 } 4282 } 4283 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4284 k++; 4285 buf_si += len_si[proc]; 4286 } 4287 4288 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4289 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4290 4291 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4292 for (i=0; i<merge->nrecv; i++){ 4293 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4294 } 4295 4296 ierr = PetscFree(len_si);CHKERRQ(ierr); 4297 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4298 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4299 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4300 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4301 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4302 ierr = PetscFree(status);CHKERRQ(ierr); 4303 4304 /* compute a local seq matrix in each processor */ 4305 /*----------------------------------------------*/ 4306 /* allocate bi array and free space for accumulating nonzero column info */ 4307 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4308 bi[0] = 0; 4309 4310 /* create and initialize a linked list */ 4311 nlnk = N+1; 4312 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4313 4314 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4315 len = 0; 4316 len = ai[owners[rank+1]] - ai[owners[rank]]; 4317 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4318 current_space = free_space; 4319 4320 /* determine symbolic info for each local row */ 4321 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4322 nextrow = buf_ri_k + merge->nrecv; 4323 nextai = nextrow + merge->nrecv; 4324 for (k=0; k<merge->nrecv; k++){ 4325 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4326 nrows = *buf_ri_k[k]; 4327 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4328 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4329 } 4330 4331 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4332 len = 0; 4333 for (i=0;i<m;i++) { 4334 bnzi = 0; 4335 /* add local non-zero cols of this proc's seqmat into lnk */ 4336 arow = owners[rank] + i; 4337 anzi = ai[arow+1] - ai[arow]; 4338 aj = a->j + ai[arow]; 4339 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4340 bnzi += nlnk; 4341 /* add received col data into lnk */ 4342 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4343 if (i == *nextrow[k]) { /* i-th row */ 4344 anzi = *(nextai[k]+1) - *nextai[k]; 4345 aj = buf_rj[k] + *nextai[k]; 4346 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4347 bnzi += nlnk; 4348 nextrow[k]++; nextai[k]++; 4349 } 4350 } 4351 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4352 4353 /* if free space is not available, make more free space */ 4354 if (current_space->local_remaining<bnzi) { 4355 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4356 nspacedouble++; 4357 } 4358 /* copy data into free space, then initialize lnk */ 4359 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4360 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4361 4362 current_space->array += bnzi; 4363 current_space->local_used += bnzi; 4364 current_space->local_remaining -= bnzi; 4365 4366 bi[i+1] = bi[i] + bnzi; 4367 } 4368 4369 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4370 4371 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4372 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4374 4375 /* create symbolic parallel matrix B_mpi */ 4376 /*---------------------------------------*/ 4377 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4378 if (n==PETSC_DECIDE) { 4379 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4380 } else { 4381 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4382 } 4383 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4384 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4385 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4386 4387 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4388 B_mpi->assembled = PETSC_FALSE; 4389 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4390 merge->bi = bi; 4391 merge->bj = bj; 4392 merge->buf_ri = buf_ri; 4393 merge->buf_rj = buf_rj; 4394 merge->coi = PETSC_NULL; 4395 merge->coj = PETSC_NULL; 4396 merge->owners_co = PETSC_NULL; 4397 4398 /* attach the supporting struct to B_mpi for reuse */ 4399 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4400 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4401 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4402 *mpimat = B_mpi; 4403 4404 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4405 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4406 PetscFunctionReturn(0); 4407 } 4408 4409 #undef __FUNCT__ 4410 #define __FUNCT__ "MatMerge_SeqsToMPI" 4411 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4412 { 4413 PetscErrorCode ierr; 4414 4415 PetscFunctionBegin; 4416 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4417 if (scall == MAT_INITIAL_MATRIX){ 4418 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4419 } 4420 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4421 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4422 PetscFunctionReturn(0); 4423 } 4424 4425 #undef __FUNCT__ 4426 #define __FUNCT__ "MatGetLocalMat" 4427 /*@ 4428 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4429 4430 Not Collective 4431 4432 Input Parameters: 4433 + A - the matrix 4434 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4435 4436 Output Parameter: 4437 . A_loc - the local sequential matrix generated 4438 4439 Level: developer 4440 4441 @*/ 4442 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4443 { 4444 PetscErrorCode ierr; 4445 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4446 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4447 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4448 MatScalar *aa=a->a,*ba=b->a,*cam; 4449 PetscScalar *ca; 4450 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4451 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4452 4453 PetscFunctionBegin; 4454 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4455 if (scall == MAT_INITIAL_MATRIX){ 4456 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4457 ci[0] = 0; 4458 for (i=0; i<am; i++){ 4459 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4460 } 4461 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4462 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4463 k = 0; 4464 for (i=0; i<am; i++) { 4465 ncols_o = bi[i+1] - bi[i]; 4466 ncols_d = ai[i+1] - ai[i]; 4467 /* off-diagonal portion of A */ 4468 for (jo=0; jo<ncols_o; jo++) { 4469 col = cmap[*bj]; 4470 if (col >= cstart) break; 4471 cj[k] = col; bj++; 4472 ca[k++] = *ba++; 4473 } 4474 /* diagonal portion of A */ 4475 for (j=0; j<ncols_d; j++) { 4476 cj[k] = cstart + *aj++; 4477 ca[k++] = *aa++; 4478 } 4479 /* off-diagonal portion of A */ 4480 for (j=jo; j<ncols_o; j++) { 4481 cj[k] = cmap[*bj++]; 4482 ca[k++] = *ba++; 4483 } 4484 } 4485 /* put together the new matrix */ 4486 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4487 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4488 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4489 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4490 mat->free_a = PETSC_TRUE; 4491 mat->free_ij = PETSC_TRUE; 4492 mat->nonew = 0; 4493 } else if (scall == MAT_REUSE_MATRIX){ 4494 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4495 ci = mat->i; cj = mat->j; cam = mat->a; 4496 for (i=0; i<am; i++) { 4497 /* off-diagonal portion of A */ 4498 ncols_o = bi[i+1] - bi[i]; 4499 for (jo=0; jo<ncols_o; jo++) { 4500 col = cmap[*bj]; 4501 if (col >= cstart) break; 4502 *cam++ = *ba++; bj++; 4503 } 4504 /* diagonal portion of A */ 4505 ncols_d = ai[i+1] - ai[i]; 4506 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4507 /* off-diagonal portion of A */ 4508 for (j=jo; j<ncols_o; j++) { 4509 *cam++ = *ba++; bj++; 4510 } 4511 } 4512 } else { 4513 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4514 } 4515 4516 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4517 PetscFunctionReturn(0); 4518 } 4519 4520 #undef __FUNCT__ 4521 #define __FUNCT__ "MatGetLocalMatCondensed" 4522 /*@C 4523 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4524 4525 Not Collective 4526 4527 Input Parameters: 4528 + A - the matrix 4529 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4530 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4531 4532 Output Parameter: 4533 . A_loc - the local sequential matrix generated 4534 4535 Level: developer 4536 4537 @*/ 4538 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4539 { 4540 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4541 PetscErrorCode ierr; 4542 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4543 IS isrowa,iscola; 4544 Mat *aloc; 4545 4546 PetscFunctionBegin; 4547 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4548 if (!row){ 4549 start = A->rmap->rstart; end = A->rmap->rend; 4550 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4551 } else { 4552 isrowa = *row; 4553 } 4554 if (!col){ 4555 start = A->cmap->rstart; 4556 cmap = a->garray; 4557 nzA = a->A->cmap->n; 4558 nzB = a->B->cmap->n; 4559 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4560 ncols = 0; 4561 for (i=0; i<nzB; i++) { 4562 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4563 else break; 4564 } 4565 imark = i; 4566 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4567 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4568 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4569 ierr = PetscFree(idx);CHKERRQ(ierr); 4570 } else { 4571 iscola = *col; 4572 } 4573 if (scall != MAT_INITIAL_MATRIX){ 4574 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4575 aloc[0] = *A_loc; 4576 } 4577 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4578 *A_loc = aloc[0]; 4579 ierr = PetscFree(aloc);CHKERRQ(ierr); 4580 if (!row){ 4581 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4582 } 4583 if (!col){ 4584 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4585 } 4586 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4587 PetscFunctionReturn(0); 4588 } 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "MatGetBrowsOfAcols" 4592 /*@C 4593 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4594 4595 Collective on Mat 4596 4597 Input Parameters: 4598 + A,B - the matrices in mpiaij format 4599 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4600 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4601 4602 Output Parameter: 4603 + rowb, colb - index sets of rows and columns of B to extract 4604 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4605 - B_seq - the sequential matrix generated 4606 4607 Level: developer 4608 4609 @*/ 4610 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4611 { 4612 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4613 PetscErrorCode ierr; 4614 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4615 IS isrowb,iscolb; 4616 Mat *bseq; 4617 4618 PetscFunctionBegin; 4619 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4620 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4621 } 4622 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4623 4624 if (scall == MAT_INITIAL_MATRIX){ 4625 start = A->cmap->rstart; 4626 cmap = a->garray; 4627 nzA = a->A->cmap->n; 4628 nzB = a->B->cmap->n; 4629 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4630 ncols = 0; 4631 for (i=0; i<nzB; i++) { /* row < local row index */ 4632 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4633 else break; 4634 } 4635 imark = i; 4636 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4637 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4638 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4639 ierr = PetscFree(idx);CHKERRQ(ierr); 4640 *brstart = imark; 4641 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4642 } else { 4643 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4644 isrowb = *rowb; iscolb = *colb; 4645 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4646 bseq[0] = *B_seq; 4647 } 4648 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4649 *B_seq = bseq[0]; 4650 ierr = PetscFree(bseq);CHKERRQ(ierr); 4651 if (!rowb){ 4652 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4653 } else { 4654 *rowb = isrowb; 4655 } 4656 if (!colb){ 4657 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4658 } else { 4659 *colb = iscolb; 4660 } 4661 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4662 PetscFunctionReturn(0); 4663 } 4664 4665 #undef __FUNCT__ 4666 #define __FUNCT__ "MatGetBrowsOfAoCols" 4667 /*@C 4668 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4669 of the OFF-DIAGONAL portion of local A 4670 4671 Collective on Mat 4672 4673 Input Parameters: 4674 + A,B - the matrices in mpiaij format 4675 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4676 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4677 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4678 4679 Output Parameter: 4680 + B_oth - the sequential matrix generated 4681 4682 Level: developer 4683 4684 @*/ 4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4686 { 4687 VecScatter_MPI_General *gen_to,*gen_from; 4688 PetscErrorCode ierr; 4689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4690 Mat_SeqAIJ *b_oth; 4691 VecScatter ctx=a->Mvctx; 4692 MPI_Comm comm=((PetscObject)ctx)->comm; 4693 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4694 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4695 PetscScalar *rvalues,*svalues; 4696 MatScalar *b_otha,*bufa,*bufA; 4697 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4698 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4699 MPI_Status *sstatus,rstatus; 4700 PetscMPIInt jj; 4701 PetscInt *cols,sbs,rbs; 4702 PetscScalar *vals; 4703 4704 PetscFunctionBegin; 4705 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4706 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4707 } 4708 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4710 4711 gen_to = (VecScatter_MPI_General*)ctx->todata; 4712 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4713 rvalues = gen_from->values; /* holds the length of receiving row */ 4714 svalues = gen_to->values; /* holds the length of sending row */ 4715 nrecvs = gen_from->n; 4716 nsends = gen_to->n; 4717 4718 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4719 srow = gen_to->indices; /* local row index to be sent */ 4720 sstarts = gen_to->starts; 4721 sprocs = gen_to->procs; 4722 sstatus = gen_to->sstatus; 4723 sbs = gen_to->bs; 4724 rstarts = gen_from->starts; 4725 rprocs = gen_from->procs; 4726 rbs = gen_from->bs; 4727 4728 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4729 if (scall == MAT_INITIAL_MATRIX){ 4730 /* i-array */ 4731 /*---------*/ 4732 /* post receives */ 4733 for (i=0; i<nrecvs; i++){ 4734 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4736 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4737 } 4738 4739 /* pack the outgoing message */ 4740 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4741 rstartsj = sstartsj + nsends +1; 4742 sstartsj[0] = 0; rstartsj[0] = 0; 4743 len = 0; /* total length of j or a array to be sent */ 4744 k = 0; 4745 for (i=0; i<nsends; i++){ 4746 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4747 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4748 for (j=0; j<nrows; j++) { 4749 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4750 for (l=0; l<sbs; l++){ 4751 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4752 rowlen[j*sbs+l] = ncols; 4753 len += ncols; 4754 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4755 } 4756 k++; 4757 } 4758 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4759 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4760 } 4761 /* recvs and sends of i-array are completed */ 4762 i = nrecvs; 4763 while (i--) { 4764 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4765 } 4766 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4767 4768 /* allocate buffers for sending j and a arrays */ 4769 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4770 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4771 4772 /* create i-array of B_oth */ 4773 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4774 b_othi[0] = 0; 4775 len = 0; /* total length of j or a array to be received */ 4776 k = 0; 4777 for (i=0; i<nrecvs; i++){ 4778 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4779 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4780 for (j=0; j<nrows; j++) { 4781 b_othi[k+1] = b_othi[k] + rowlen[j]; 4782 len += rowlen[j]; k++; 4783 } 4784 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4785 } 4786 4787 /* allocate space for j and a arrrays of B_oth */ 4788 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4789 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4790 4791 /* j-array */ 4792 /*---------*/ 4793 /* post receives of j-array */ 4794 for (i=0; i<nrecvs; i++){ 4795 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4796 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4797 } 4798 4799 /* pack the outgoing message j-array */ 4800 k = 0; 4801 for (i=0; i<nsends; i++){ 4802 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4803 bufJ = bufj+sstartsj[i]; 4804 for (j=0; j<nrows; j++) { 4805 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4806 for (ll=0; ll<sbs; ll++){ 4807 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4808 for (l=0; l<ncols; l++){ 4809 *bufJ++ = cols[l]; 4810 } 4811 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4812 } 4813 } 4814 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4815 } 4816 4817 /* recvs and sends of j-array are completed */ 4818 i = nrecvs; 4819 while (i--) { 4820 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4821 } 4822 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4823 } else if (scall == MAT_REUSE_MATRIX){ 4824 sstartsj = *startsj; 4825 rstartsj = sstartsj + nsends +1; 4826 bufa = *bufa_ptr; 4827 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4828 b_otha = b_oth->a; 4829 } else { 4830 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4831 } 4832 4833 /* a-array */ 4834 /*---------*/ 4835 /* post receives of a-array */ 4836 for (i=0; i<nrecvs; i++){ 4837 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4838 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4839 } 4840 4841 /* pack the outgoing message a-array */ 4842 k = 0; 4843 for (i=0; i<nsends; i++){ 4844 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4845 bufA = bufa+sstartsj[i]; 4846 for (j=0; j<nrows; j++) { 4847 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4848 for (ll=0; ll<sbs; ll++){ 4849 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4850 for (l=0; l<ncols; l++){ 4851 *bufA++ = vals[l]; 4852 } 4853 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4854 } 4855 } 4856 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4857 } 4858 /* recvs and sends of a-array are completed */ 4859 i = nrecvs; 4860 while (i--) { 4861 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4862 } 4863 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4864 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4865 4866 if (scall == MAT_INITIAL_MATRIX){ 4867 /* put together the new matrix */ 4868 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4869 4870 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4871 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4872 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4873 b_oth->free_a = PETSC_TRUE; 4874 b_oth->free_ij = PETSC_TRUE; 4875 b_oth->nonew = 0; 4876 4877 ierr = PetscFree(bufj);CHKERRQ(ierr); 4878 if (!startsj || !bufa_ptr){ 4879 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4880 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4881 } else { 4882 *startsj = sstartsj; 4883 *bufa_ptr = bufa; 4884 } 4885 } 4886 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatGetCommunicationStructs" 4892 /*@C 4893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4894 4895 Not Collective 4896 4897 Input Parameters: 4898 . A - The matrix in mpiaij format 4899 4900 Output Parameter: 4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4902 . colmap - A map from global column index to local index into lvec 4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4904 4905 Level: developer 4906 4907 @*/ 4908 #if defined (PETSC_USE_CTABLE) 4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4910 #else 4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4912 #endif 4913 { 4914 Mat_MPIAIJ *a; 4915 4916 PetscFunctionBegin; 4917 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4918 PetscValidPointer(lvec, 2) 4919 PetscValidPointer(colmap, 3) 4920 PetscValidPointer(multScatter, 4) 4921 a = (Mat_MPIAIJ *) A->data; 4922 if (lvec) *lvec = a->lvec; 4923 if (colmap) *colmap = a->colmap; 4924 if (multScatter) *multScatter = a->Mvctx; 4925 PetscFunctionReturn(0); 4926 } 4927 4928 EXTERN_C_BEGIN 4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4931 EXTERN_C_END 4932 4933 #include "../src/mat/impls/dense/mpi/mpidense.h" 4934 4935 #undef __FUNCT__ 4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4937 /* 4938 Computes (B'*A')' since computing B*A directly is untenable 4939 4940 n p p 4941 ( ) ( ) ( ) 4942 m ( A ) * n ( B ) = m ( C ) 4943 ( ) ( ) ( ) 4944 4945 */ 4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4947 { 4948 PetscErrorCode ierr; 4949 Mat At,Bt,Ct; 4950 4951 PetscFunctionBegin; 4952 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4953 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4954 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4955 ierr = MatDestroy(At);CHKERRQ(ierr); 4956 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4957 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4958 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4965 { 4966 PetscErrorCode ierr; 4967 PetscInt m=A->rmap->n,n=B->cmap->n; 4968 Mat Cmat; 4969 4970 PetscFunctionBegin; 4971 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4972 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4973 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4974 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4975 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4976 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4977 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4978 *C = Cmat; 4979 PetscFunctionReturn(0); 4980 } 4981 4982 /* ----------------------------------------------------------------*/ 4983 #undef __FUNCT__ 4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4986 { 4987 PetscErrorCode ierr; 4988 4989 PetscFunctionBegin; 4990 if (scall == MAT_INITIAL_MATRIX){ 4991 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4992 } 4993 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4994 PetscFunctionReturn(0); 4995 } 4996 4997 EXTERN_C_BEGIN 4998 #if defined(PETSC_HAVE_MUMPS) 4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5000 #endif 5001 #if defined(PETSC_HAVE_PASTIX) 5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5003 #endif 5004 #if defined(PETSC_HAVE_SUPERLU_DIST) 5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5006 #endif 5007 #if defined(PETSC_HAVE_SPOOLES) 5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5009 #endif 5010 EXTERN_C_END 5011 5012 /*MC 5013 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5014 5015 Options Database Keys: 5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5017 5018 Level: beginner 5019 5020 .seealso: MatCreateMPIAIJ() 5021 M*/ 5022 5023 EXTERN_C_BEGIN 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatCreate_MPIAIJ" 5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5027 { 5028 Mat_MPIAIJ *b; 5029 PetscErrorCode ierr; 5030 PetscMPIInt size; 5031 5032 PetscFunctionBegin; 5033 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5034 5035 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5036 B->data = (void*)b; 5037 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5038 B->rmap->bs = 1; 5039 B->assembled = PETSC_FALSE; 5040 B->mapping = 0; 5041 5042 B->insertmode = NOT_SET_VALUES; 5043 b->size = size; 5044 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5045 5046 /* build cache for off array entries formed */ 5047 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5048 b->donotstash = PETSC_FALSE; 5049 b->colmap = 0; 5050 b->garray = 0; 5051 b->roworiented = PETSC_TRUE; 5052 5053 /* stuff used for matrix vector multiply */ 5054 b->lvec = PETSC_NULL; 5055 b->Mvctx = PETSC_NULL; 5056 5057 /* stuff for MatGetRow() */ 5058 b->rowindices = 0; 5059 b->rowvalues = 0; 5060 b->getrowactive = PETSC_FALSE; 5061 5062 #if defined(PETSC_HAVE_SPOOLES) 5063 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5064 "MatGetFactor_mpiaij_spooles", 5065 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5066 #endif 5067 #if defined(PETSC_HAVE_MUMPS) 5068 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5069 "MatGetFactor_mpiaij_mumps", 5070 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5071 #endif 5072 #if defined(PETSC_HAVE_PASTIX) 5073 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5074 "MatGetFactor_mpiaij_pastix", 5075 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5076 #endif 5077 #if defined(PETSC_HAVE_SUPERLU_DIST) 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5079 "MatGetFactor_mpiaij_superlu_dist", 5080 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5081 #endif 5082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5083 "MatStoreValues_MPIAIJ", 5084 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5086 "MatRetrieveValues_MPIAIJ", 5087 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5089 "MatGetDiagonalBlock_MPIAIJ", 5090 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5092 "MatIsTranspose_MPIAIJ", 5093 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5095 "MatMPIAIJSetPreallocation_MPIAIJ", 5096 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5098 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5099 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5101 "MatDiagonalScaleLocal_MPIAIJ", 5102 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5104 "MatConvert_MPIAIJ_MPICSRPERM", 5105 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5107 "MatConvert_MPIAIJ_MPICRL", 5108 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5110 "MatMatMult_MPIDense_MPIAIJ", 5111 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5113 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5114 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5116 "MatMatMultNumeric_MPIDense_MPIAIJ", 5117 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5118 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5119 PetscFunctionReturn(0); 5120 } 5121 EXTERN_C_END 5122 5123 #undef __FUNCT__ 5124 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5125 /*@ 5126 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5127 and "off-diagonal" part of the matrix in CSR format. 5128 5129 Collective on MPI_Comm 5130 5131 Input Parameters: 5132 + comm - MPI communicator 5133 . m - number of local rows (Cannot be PETSC_DECIDE) 5134 . n - This value should be the same as the local size used in creating the 5135 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5136 calculated if N is given) For square matrices n is almost always m. 5137 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5138 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5139 . i - row indices for "diagonal" portion of matrix 5140 . j - column indices 5141 . a - matrix values 5142 . oi - row indices for "off-diagonal" portion of matrix 5143 . oj - column indices 5144 - oa - matrix values 5145 5146 Output Parameter: 5147 . mat - the matrix 5148 5149 Level: advanced 5150 5151 Notes: 5152 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5153 5154 The i and j indices are 0 based 5155 5156 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5157 5158 5159 .keywords: matrix, aij, compressed row, sparse, parallel 5160 5161 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5162 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5163 @*/ 5164 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5165 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5166 { 5167 PetscErrorCode ierr; 5168 Mat_MPIAIJ *maij; 5169 5170 PetscFunctionBegin; 5171 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5172 if (i[0]) { 5173 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5174 } 5175 if (oi[0]) { 5176 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5177 } 5178 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5179 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5180 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5181 maij = (Mat_MPIAIJ*) (*mat)->data; 5182 maij->donotstash = PETSC_TRUE; 5183 (*mat)->preallocated = PETSC_TRUE; 5184 5185 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5186 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5187 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5188 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5189 5190 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5191 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5192 5193 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5194 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5195 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5196 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5197 5198 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 PetscFunctionReturn(0); 5201 } 5202 5203 /* 5204 Special version for direct calls from Fortran 5205 */ 5206 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5207 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5208 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5209 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5210 #endif 5211 5212 /* Change these macros so can be used in void function */ 5213 #undef CHKERRQ 5214 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5215 #undef SETERRQ2 5216 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5217 #undef SETERRQ 5218 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5219 5220 EXTERN_C_BEGIN 5221 #undef __FUNCT__ 5222 #define __FUNCT__ "matsetvaluesmpiaij_" 5223 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5224 { 5225 Mat mat = *mmat; 5226 PetscInt m = *mm, n = *mn; 5227 InsertMode addv = *maddv; 5228 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5229 PetscScalar value; 5230 PetscErrorCode ierr; 5231 5232 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5233 if (mat->insertmode == NOT_SET_VALUES) { 5234 mat->insertmode = addv; 5235 } 5236 #if defined(PETSC_USE_DEBUG) 5237 else if (mat->insertmode != addv) { 5238 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5239 } 5240 #endif 5241 { 5242 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5243 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5244 PetscTruth roworiented = aij->roworiented; 5245 5246 /* Some Variables required in the macro */ 5247 Mat A = aij->A; 5248 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5249 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5250 MatScalar *aa = a->a; 5251 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5252 Mat B = aij->B; 5253 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5254 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5255 MatScalar *ba = b->a; 5256 5257 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5258 PetscInt nonew = a->nonew; 5259 MatScalar *ap1,*ap2; 5260 5261 PetscFunctionBegin; 5262 for (i=0; i<m; i++) { 5263 if (im[i] < 0) continue; 5264 #if defined(PETSC_USE_DEBUG) 5265 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5266 #endif 5267 if (im[i] >= rstart && im[i] < rend) { 5268 row = im[i] - rstart; 5269 lastcol1 = -1; 5270 rp1 = aj + ai[row]; 5271 ap1 = aa + ai[row]; 5272 rmax1 = aimax[row]; 5273 nrow1 = ailen[row]; 5274 low1 = 0; 5275 high1 = nrow1; 5276 lastcol2 = -1; 5277 rp2 = bj + bi[row]; 5278 ap2 = ba + bi[row]; 5279 rmax2 = bimax[row]; 5280 nrow2 = bilen[row]; 5281 low2 = 0; 5282 high2 = nrow2; 5283 5284 for (j=0; j<n; j++) { 5285 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5286 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5287 if (in[j] >= cstart && in[j] < cend){ 5288 col = in[j] - cstart; 5289 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5290 } else if (in[j] < 0) continue; 5291 #if defined(PETSC_USE_DEBUG) 5292 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5293 #endif 5294 else { 5295 if (mat->was_assembled) { 5296 if (!aij->colmap) { 5297 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5298 } 5299 #if defined (PETSC_USE_CTABLE) 5300 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5301 col--; 5302 #else 5303 col = aij->colmap[in[j]] - 1; 5304 #endif 5305 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5306 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5307 col = in[j]; 5308 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5309 B = aij->B; 5310 b = (Mat_SeqAIJ*)B->data; 5311 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5312 rp2 = bj + bi[row]; 5313 ap2 = ba + bi[row]; 5314 rmax2 = bimax[row]; 5315 nrow2 = bilen[row]; 5316 low2 = 0; 5317 high2 = nrow2; 5318 bm = aij->B->rmap->n; 5319 ba = b->a; 5320 } 5321 } else col = in[j]; 5322 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5323 } 5324 } 5325 } else { 5326 if (!aij->donotstash) { 5327 if (roworiented) { 5328 if (ignorezeroentries && v[i*n] == 0.0) continue; 5329 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 5330 } else { 5331 if (ignorezeroentries && v[i] == 0.0) continue; 5332 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5333 } 5334 } 5335 } 5336 }} 5337 PetscFunctionReturnVoid(); 5338 } 5339 EXTERN_C_END 5340 5341