1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 8 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 9 { 10 PetscErrorCode ierr; 11 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 12 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 13 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 14 const PetscInt *ia,*ib; 15 const MatScalar *aa,*bb; 16 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 17 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 18 19 PetscFunctionBegin; 20 *keptrows = 0; 21 ia = a->i; 22 ib = b->i; 23 for (i=0; i<m; i++) { 24 na = ia[i+1] - ia[i]; 25 nb = ib[i+1] - ib[i]; 26 if (!na && !nb) { 27 cnt++; 28 goto ok1; 29 } 30 aa = a->a + ia[i]; 31 for (j=0; j<na; j++) { 32 if (aa[j] != 0.0) goto ok1; 33 } 34 bb = b->a + ib[i]; 35 for (j=0; j <nb; j++) { 36 if (bb[j] != 0.0) goto ok1; 37 } 38 cnt++; 39 ok1:; 40 } 41 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 42 if (!n0rows) PetscFunctionReturn(0); 43 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 44 cnt = 0; 45 for (i=0; i<m; i++) { 46 na = ia[i+1] - ia[i]; 47 nb = ib[i+1] - ib[i]; 48 if (!na && !nb) continue; 49 aa = a->a + ia[i]; 50 for(j=0; j<na;j++) { 51 if (aa[j] != 0.0) { 52 rows[cnt++] = rstart + i; 53 goto ok2; 54 } 55 } 56 bb = b->a + ib[i]; 57 for (j=0; j<nb; j++) { 58 if (bb[j] != 0.0) { 59 rows[cnt++] = rstart + i; 60 goto ok2; 61 } 62 } 63 ok2:; 64 } 65 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatDistribute_MPIAIJ" 71 /* 72 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 73 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 74 75 Only for square matrices 76 */ 77 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 78 { 79 PetscMPIInt rank,size; 80 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 81 PetscErrorCode ierr; 82 Mat mat; 83 Mat_SeqAIJ *gmata; 84 PetscMPIInt tag; 85 MPI_Status status; 86 PetscBool aij; 87 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 88 89 PetscFunctionBegin; 90 CHKMEMQ; 91 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 92 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 93 if (!rank) { 94 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 95 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 96 } 97 if (reuse == MAT_INITIAL_MATRIX) { 98 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 99 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 100 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 101 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 102 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 103 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 104 rowners[0] = 0; 105 for (i=2; i<=size; i++) { 106 rowners[i] += rowners[i-1]; 107 } 108 rstart = rowners[rank]; 109 rend = rowners[rank+1]; 110 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 111 if (!rank) { 112 gmata = (Mat_SeqAIJ*) gmat->data; 113 /* send row lengths to all processors */ 114 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 115 for (i=1; i<size; i++) { 116 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 117 } 118 /* determine number diagonal and off-diagonal counts */ 119 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 120 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 121 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 122 jj = 0; 123 for (i=0; i<m; i++) { 124 for (j=0; j<dlens[i]; j++) { 125 if (gmata->j[jj] < rstart) ld[i]++; 126 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 127 jj++; 128 } 129 } 130 /* send column indices to other processes */ 131 for (i=1; i<size; i++) { 132 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 133 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 134 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 135 } 136 137 /* send numerical values to other processes */ 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 gmataa = gmata->a; 143 gmataj = gmata->j; 144 145 } else { 146 /* receive row lengths */ 147 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 148 /* receive column indices */ 149 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 150 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 151 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 152 /* determine number diagonal and off-diagonal counts */ 153 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 154 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 155 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 156 jj = 0; 157 for (i=0; i<m; i++) { 158 for (j=0; j<dlens[i]; j++) { 159 if (gmataj[jj] < rstart) ld[i]++; 160 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 161 jj++; 162 } 163 } 164 /* receive numerical values */ 165 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 166 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 167 } 168 /* set preallocation */ 169 for (i=0; i<m; i++) { 170 dlens[i] -= olens[i]; 171 } 172 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 173 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 174 175 for (i=0; i<m; i++) { 176 dlens[i] += olens[i]; 177 } 178 cnt = 0; 179 for (i=0; i<m; i++) { 180 row = rstart + i; 181 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 182 cnt += dlens[i]; 183 } 184 if (rank) { 185 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 186 } 187 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 188 ierr = PetscFree(rowners);CHKERRQ(ierr); 189 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 190 *inmat = mat; 191 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 192 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 193 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 194 mat = *inmat; 195 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 196 if (!rank) { 197 /* send numerical values to other processes */ 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 200 gmataa = gmata->a; 201 for (i=1; i<size; i++) { 202 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 203 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 204 } 205 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 206 } else { 207 /* receive numerical values from process 0*/ 208 nz = Ad->nz + Ao->nz; 209 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 210 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 211 } 212 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 213 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 214 ad = Ad->a; 215 ao = Ao->a; 216 if (mat->rmap->n) { 217 i = 0; 218 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 219 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 220 } 221 for (i=1; i<mat->rmap->n; i++) { 222 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 223 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 224 } 225 i--; 226 if (mat->rmap->n) { 227 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 228 } 229 if (rank) { 230 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 231 } 232 } 233 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 235 CHKMEMQ; 236 PetscFunctionReturn(0); 237 } 238 239 /* 240 Local utility routine that creates a mapping from the global column 241 number to the local number in the off-diagonal part of the local 242 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 243 a slightly higher hash table cost; without it it is not scalable (each processor 244 has an order N integer array but is fast to acess. 245 */ 246 #undef __FUNCT__ 247 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 248 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 249 { 250 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 251 PetscErrorCode ierr; 252 PetscInt n = aij->B->cmap->n,i; 253 254 PetscFunctionBegin; 255 #if defined (PETSC_USE_CTABLE) 256 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 257 for (i=0; i<n; i++){ 258 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 259 } 260 #else 261 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 262 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 264 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 265 #endif 266 PetscFunctionReturn(0); 267 } 268 269 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 270 { \ 271 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 272 lastcol1 = col;\ 273 while (high1-low1 > 5) { \ 274 t = (low1+high1)/2; \ 275 if (rp1[t] > col) high1 = t; \ 276 else low1 = t; \ 277 } \ 278 for (_i=low1; _i<high1; _i++) { \ 279 if (rp1[_i] > col) break; \ 280 if (rp1[_i] == col) { \ 281 if (addv == ADD_VALUES) ap1[_i] += value; \ 282 else ap1[_i] = value; \ 283 goto a_noinsert; \ 284 } \ 285 } \ 286 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 287 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 288 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 289 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 290 N = nrow1++ - 1; a->nz++; high1++; \ 291 /* shift up all the later entries in this row */ \ 292 for (ii=N; ii>=_i; ii--) { \ 293 rp1[ii+1] = rp1[ii]; \ 294 ap1[ii+1] = ap1[ii]; \ 295 } \ 296 rp1[_i] = col; \ 297 ap1[_i] = value; \ 298 a_noinsert: ; \ 299 ailen[row] = nrow1; \ 300 } 301 302 303 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 304 { \ 305 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 306 lastcol2 = col;\ 307 while (high2-low2 > 5) { \ 308 t = (low2+high2)/2; \ 309 if (rp2[t] > col) high2 = t; \ 310 else low2 = t; \ 311 } \ 312 for (_i=low2; _i<high2; _i++) { \ 313 if (rp2[_i] > col) break; \ 314 if (rp2[_i] == col) { \ 315 if (addv == ADD_VALUES) ap2[_i] += value; \ 316 else ap2[_i] = value; \ 317 goto b_noinsert; \ 318 } \ 319 } \ 320 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 321 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 322 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 323 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 324 N = nrow2++ - 1; b->nz++; high2++; \ 325 /* shift up all the later entries in this row */ \ 326 for (ii=N; ii>=_i; ii--) { \ 327 rp2[ii+1] = rp2[ii]; \ 328 ap2[ii+1] = ap2[ii]; \ 329 } \ 330 rp2[_i] = col; \ 331 ap2[_i] = value; \ 332 b_noinsert: ; \ 333 bilen[row] = nrow2; \ 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 338 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 339 { 340 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 341 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 342 PetscErrorCode ierr; 343 PetscInt l,*garray = mat->garray,diag; 344 345 PetscFunctionBegin; 346 /* code only works for square matrices A */ 347 348 /* find size of row to the left of the diagonal part */ 349 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 350 row = row - diag; 351 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 352 if (garray[b->j[b->i[row]+l]] > diag) break; 353 } 354 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 355 356 /* diagonal part */ 357 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 358 359 /* right of diagonal part */ 360 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatSetValues_MPIAIJ" 366 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 367 { 368 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 369 PetscScalar value; 370 PetscErrorCode ierr; 371 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 372 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 373 PetscBool roworiented = aij->roworiented; 374 375 /* Some Variables required in the macro */ 376 Mat A = aij->A; 377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 378 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 379 MatScalar *aa = a->a; 380 PetscBool ignorezeroentries = a->ignorezeroentries; 381 Mat B = aij->B; 382 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 383 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 384 MatScalar *ba = b->a; 385 386 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 387 PetscInt nonew = a->nonew; 388 MatScalar *ap1,*ap2; 389 390 PetscFunctionBegin; 391 if (v) PetscValidScalarPointer(v,6); 392 for (i=0; i<m; i++) { 393 if (im[i] < 0) continue; 394 #if defined(PETSC_USE_DEBUG) 395 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 396 #endif 397 if (im[i] >= rstart && im[i] < rend) { 398 row = im[i] - rstart; 399 lastcol1 = -1; 400 rp1 = aj + ai[row]; 401 ap1 = aa + ai[row]; 402 rmax1 = aimax[row]; 403 nrow1 = ailen[row]; 404 low1 = 0; 405 high1 = nrow1; 406 lastcol2 = -1; 407 rp2 = bj + bi[row]; 408 ap2 = ba + bi[row]; 409 rmax2 = bimax[row]; 410 nrow2 = bilen[row]; 411 low2 = 0; 412 high2 = nrow2; 413 414 for (j=0; j<n; j++) { 415 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 416 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 417 if (in[j] >= cstart && in[j] < cend){ 418 col = in[j] - cstart; 419 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 420 } else if (in[j] < 0) continue; 421 #if defined(PETSC_USE_DEBUG) 422 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 423 #endif 424 else { 425 if (mat->was_assembled) { 426 if (!aij->colmap) { 427 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 428 } 429 #if defined (PETSC_USE_CTABLE) 430 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 431 col--; 432 #else 433 col = aij->colmap[in[j]] - 1; 434 #endif 435 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 436 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 437 col = in[j]; 438 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 439 B = aij->B; 440 b = (Mat_SeqAIJ*)B->data; 441 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 442 rp2 = bj + bi[row]; 443 ap2 = ba + bi[row]; 444 rmax2 = bimax[row]; 445 nrow2 = bilen[row]; 446 low2 = 0; 447 high2 = nrow2; 448 bm = aij->B->rmap->n; 449 ba = b->a; 450 } 451 } else col = in[j]; 452 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 453 } 454 } 455 } else { 456 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 457 if (!aij->donotstash) { 458 if (roworiented) { 459 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 460 } else { 461 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatGetValues_MPIAIJ" 471 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 472 { 473 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 474 PetscErrorCode ierr; 475 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 476 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 477 478 PetscFunctionBegin; 479 for (i=0; i<m; i++) { 480 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 481 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 482 if (idxm[i] >= rstart && idxm[i] < rend) { 483 row = idxm[i] - rstart; 484 for (j=0; j<n; j++) { 485 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 486 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 487 if (idxn[j] >= cstart && idxn[j] < cend){ 488 col = idxn[j] - cstart; 489 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 490 } else { 491 if (!aij->colmap) { 492 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 493 } 494 #if defined (PETSC_USE_CTABLE) 495 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 496 col --; 497 #else 498 col = aij->colmap[idxn[j]] - 1; 499 #endif 500 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 501 else { 502 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 503 } 504 } 505 } 506 } else { 507 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 508 } 509 } 510 PetscFunctionReturn(0); 511 } 512 513 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 517 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 518 { 519 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 520 PetscErrorCode ierr; 521 PetscInt nstash,reallocs; 522 InsertMode addv; 523 524 PetscFunctionBegin; 525 if (aij->donotstash || mat->nooffprocentries) { 526 PetscFunctionReturn(0); 527 } 528 529 /* make sure all processors are either in INSERTMODE or ADDMODE */ 530 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 531 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 532 mat->insertmode = addv; /* in case this processor had no cache */ 533 534 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 535 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 536 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 542 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 543 { 544 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 545 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 546 PetscErrorCode ierr; 547 PetscMPIInt n; 548 PetscInt i,j,rstart,ncols,flg; 549 PetscInt *row,*col; 550 PetscBool other_disassembled; 551 PetscScalar *val; 552 InsertMode addv = mat->insertmode; 553 554 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 555 PetscFunctionBegin; 556 if (!aij->donotstash && !mat->nooffprocentries) { 557 while (1) { 558 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 559 if (!flg) break; 560 561 for (i=0; i<n;) { 562 /* Now identify the consecutive vals belonging to the same row */ 563 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 564 if (j < n) ncols = j-i; 565 else ncols = n-i; 566 /* Now assemble all these values with a single function call */ 567 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 568 i = j; 569 } 570 } 571 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 572 } 573 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 589 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 590 } 591 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 592 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 597 aij->rowvalues = 0; 598 599 /* used by MatAXPY() */ 600 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 601 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 602 603 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 604 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 610 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 611 { 612 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 617 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatZeroRows_MPIAIJ" 623 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 624 { 625 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 626 PetscErrorCode ierr; 627 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 628 PetscInt i,*owners = A->rmap->range; 629 PetscInt *nprocs,j,idx,nsends,row; 630 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 631 PetscInt *rvalues,count,base,slen,*source; 632 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 633 MPI_Comm comm = ((PetscObject)A)->comm; 634 MPI_Request *send_waits,*recv_waits; 635 MPI_Status recv_status,*send_status; 636 const PetscScalar *xx; 637 PetscScalar *bb; 638 #if defined(PETSC_DEBUG) 639 PetscBool found = PETSC_FALSE; 640 #endif 641 642 PetscFunctionBegin; 643 /* first count number of contributors to each processor */ 644 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 645 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 646 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 647 j = 0; 648 for (i=0; i<N; i++) { 649 if (lastidx > (idx = rows[i])) j = 0; 650 lastidx = idx; 651 for (; j<size; j++) { 652 if (idx >= owners[j] && idx < owners[j+1]) { 653 nprocs[2*j]++; 654 nprocs[2*j+1] = 1; 655 owner[i] = j; 656 #if defined(PETSC_DEBUG) 657 found = PETSC_TRUE; 658 #endif 659 break; 660 } 661 } 662 #if defined(PETSC_DEBUG) 663 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 664 found = PETSC_FALSE; 665 #endif 666 } 667 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 668 669 if (A->nooffproczerorows) { 670 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 671 nrecvs = nsends; 672 nmax = N; 673 } else { 674 /* inform other processors of number of messages and max length*/ 675 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 676 } 677 678 /* post receives: */ 679 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 680 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 681 for (i=0; i<nrecvs; i++) { 682 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 683 } 684 685 /* do sends: 686 1) starts[i] gives the starting index in svalues for stuff going to 687 the ith processor 688 */ 689 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 690 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 691 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 692 starts[0] = 0; 693 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 694 for (i=0; i<N; i++) { 695 svalues[starts[owner[i]]++] = rows[i]; 696 } 697 698 starts[0] = 0; 699 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 700 count = 0; 701 for (i=0; i<size; i++) { 702 if (nprocs[2*i+1]) { 703 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 704 } 705 } 706 ierr = PetscFree(starts);CHKERRQ(ierr); 707 708 base = owners[rank]; 709 710 /* wait on receives */ 711 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 712 count = nrecvs; slen = 0; 713 while (count) { 714 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 715 /* unpack receives into our local space */ 716 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 717 source[imdex] = recv_status.MPI_SOURCE; 718 lens[imdex] = n; 719 slen += n; 720 count--; 721 } 722 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 723 724 /* move the data into the send scatter */ 725 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 726 count = 0; 727 for (i=0; i<nrecvs; i++) { 728 values = rvalues + i*nmax; 729 for (j=0; j<lens[i]; j++) { 730 lrows[count++] = values[j] - base; 731 } 732 } 733 ierr = PetscFree(rvalues);CHKERRQ(ierr); 734 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 735 ierr = PetscFree(owner);CHKERRQ(ierr); 736 ierr = PetscFree(nprocs);CHKERRQ(ierr); 737 738 /* fix right hand side if needed */ 739 if (x && b) { 740 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 741 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 742 for (i=0; i<slen; i++) { 743 bb[lrows[i]] = diag*xx[lrows[i]]; 744 } 745 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 746 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 747 } 748 /* 749 Zero the required rows. If the "diagonal block" of the matrix 750 is square and the user wishes to set the diagonal we use separate 751 code so that MatSetValues() is not called for each diagonal allocating 752 new memory, thus calling lots of mallocs and slowing things down. 753 754 */ 755 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 756 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 757 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 758 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 759 } else if (diag != 0.0) { 760 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 761 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 762 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 763 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 764 } 765 for (i = 0; i < slen; i++) { 766 row = lrows[i] + rstart; 767 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 768 } 769 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 770 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 771 } else { 772 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 773 } 774 ierr = PetscFree(lrows);CHKERRQ(ierr); 775 776 /* wait on sends */ 777 if (nsends) { 778 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 779 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 780 ierr = PetscFree(send_status);CHKERRQ(ierr); 781 } 782 ierr = PetscFree(send_waits);CHKERRQ(ierr); 783 ierr = PetscFree(svalues);CHKERRQ(ierr); 784 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 790 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 791 { 792 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 793 PetscErrorCode ierr; 794 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 795 PetscInt i,*owners = A->rmap->range; 796 PetscInt *nprocs,j,idx,nsends; 797 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 798 PetscInt *rvalues,count,base,slen,*source; 799 PetscInt *lens,*lrows,*values,m; 800 MPI_Comm comm = ((PetscObject)A)->comm; 801 MPI_Request *send_waits,*recv_waits; 802 MPI_Status recv_status,*send_status; 803 const PetscScalar *xx; 804 PetscScalar *bb,*mask; 805 Vec xmask,lmask; 806 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 807 const PetscInt *aj, *ii,*ridx; 808 PetscScalar *aa; 809 #if defined(PETSC_DEBUG) 810 PetscBool found = PETSC_FALSE; 811 #endif 812 813 PetscFunctionBegin; 814 /* first count number of contributors to each processor */ 815 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 816 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 817 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 818 j = 0; 819 for (i=0; i<N; i++) { 820 if (lastidx > (idx = rows[i])) j = 0; 821 lastidx = idx; 822 for (; j<size; j++) { 823 if (idx >= owners[j] && idx < owners[j+1]) { 824 nprocs[2*j]++; 825 nprocs[2*j+1] = 1; 826 owner[i] = j; 827 #if defined(PETSC_DEBUG) 828 found = PETSC_TRUE; 829 #endif 830 break; 831 } 832 } 833 #if defined(PETSC_DEBUG) 834 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 835 found = PETSC_FALSE; 836 #endif 837 } 838 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 839 840 /* inform other processors of number of messages and max length*/ 841 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 842 843 /* post receives: */ 844 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 845 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 846 for (i=0; i<nrecvs; i++) { 847 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 848 } 849 850 /* do sends: 851 1) starts[i] gives the starting index in svalues for stuff going to 852 the ith processor 853 */ 854 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 855 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 856 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 857 starts[0] = 0; 858 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 859 for (i=0; i<N; i++) { 860 svalues[starts[owner[i]]++] = rows[i]; 861 } 862 863 starts[0] = 0; 864 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 865 count = 0; 866 for (i=0; i<size; i++) { 867 if (nprocs[2*i+1]) { 868 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 869 } 870 } 871 ierr = PetscFree(starts);CHKERRQ(ierr); 872 873 base = owners[rank]; 874 875 /* wait on receives */ 876 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 877 count = nrecvs; slen = 0; 878 while (count) { 879 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 880 /* unpack receives into our local space */ 881 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 882 source[imdex] = recv_status.MPI_SOURCE; 883 lens[imdex] = n; 884 slen += n; 885 count--; 886 } 887 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 888 889 /* move the data into the send scatter */ 890 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 891 count = 0; 892 for (i=0; i<nrecvs; i++) { 893 values = rvalues + i*nmax; 894 for (j=0; j<lens[i]; j++) { 895 lrows[count++] = values[j] - base; 896 } 897 } 898 ierr = PetscFree(rvalues);CHKERRQ(ierr); 899 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 900 ierr = PetscFree(owner);CHKERRQ(ierr); 901 ierr = PetscFree(nprocs);CHKERRQ(ierr); 902 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 903 904 /* zero diagonal part of matrix */ 905 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 906 907 /* handle off diagonal part of matrix */ 908 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 909 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 910 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 911 for (i=0; i<slen; i++) { 912 bb[lrows[i]] = 1; 913 } 914 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 915 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecDestroy(xmask);CHKERRQ(ierr); 918 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 923 924 /* remove zeroed rows of off diagonal matrix */ 925 ii = aij->i; 926 for (i=0; i<slen; i++) { 927 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 928 } 929 930 /* loop over all elements of off process part of matrix zeroing removed columns*/ 931 if (aij->compressedrow.use){ 932 m = aij->compressedrow.nrows; 933 ii = aij->compressedrow.i; 934 ridx = aij->compressedrow.rindex; 935 for (i=0; i<m; i++){ 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij->a + ii[i]; 939 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 bb[*ridx] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 ridx++; 949 } 950 } else { /* do not use compressed row format */ 951 m = l->B->rmap->n; 952 for (i=0; i<m; i++) { 953 n = ii[i+1] - ii[i]; 954 aj = aij->j + ii[i]; 955 aa = aij->a + ii[i]; 956 for (j=0; j<n; j++) { 957 if (PetscAbsScalar(mask[*aj])) { 958 bb[i] -= *aa*xx[*aj]; 959 *aa = 0.0; 960 } 961 aa++; 962 aj++; 963 } 964 } 965 } 966 967 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 968 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 969 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 970 ierr = VecDestroy(lmask);CHKERRQ(ierr); 971 ierr = PetscFree(lrows);CHKERRQ(ierr); 972 973 /* wait on sends */ 974 if (nsends) { 975 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 976 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 977 ierr = PetscFree(send_status);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(send_waits);CHKERRQ(ierr); 980 ierr = PetscFree(svalues);CHKERRQ(ierr); 981 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatMult_MPIAIJ" 987 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 PetscInt nt; 992 993 PetscFunctionBegin; 994 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 995 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 996 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 998 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1005 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1006 { 1007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1017 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1018 { 1019 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1032 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1035 PetscErrorCode ierr; 1036 PetscBool merged; 1037 1038 PetscFunctionBegin; 1039 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1040 /* do nondiagonal part */ 1041 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1042 if (!merged) { 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* do local part */ 1046 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1047 /* receive remote parts: note this assumes the values are not actually */ 1048 /* added in yy until the next line, */ 1049 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1050 } else { 1051 /* do local part */ 1052 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1053 /* send it on its way */ 1054 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1055 /* values actually were received in the Begin() but we need to call this nop */ 1056 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 EXTERN_C_BEGIN 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1064 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1065 { 1066 MPI_Comm comm; 1067 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1068 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1069 IS Me,Notme; 1070 PetscErrorCode ierr; 1071 PetscInt M,N,first,last,*notme,i; 1072 PetscMPIInt size; 1073 1074 PetscFunctionBegin; 1075 1076 /* Easy test: symmetric diagonal block */ 1077 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1078 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1079 if (!*f) PetscFunctionReturn(0); 1080 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1081 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1082 if (size == 1) PetscFunctionReturn(0); 1083 1084 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1085 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1086 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1087 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1088 for (i=0; i<first; i++) notme[i] = i; 1089 for (i=last; i<M; i++) notme[i-last+first] = i; 1090 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1091 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1092 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1093 Aoff = Aoffs[0]; 1094 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1095 Boff = Boffs[0]; 1096 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1097 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1098 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1099 ierr = ISDestroy(Me);CHKERRQ(ierr); 1100 ierr = ISDestroy(Notme);CHKERRQ(ierr); 1101 1102 PetscFunctionReturn(0); 1103 } 1104 EXTERN_C_END 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1108 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 /* do nondiagonal part */ 1115 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1116 /* send it on its way */ 1117 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1118 /* do local part */ 1119 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1120 /* receive remote parts */ 1121 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 /* 1126 This only works correctly for square matrices where the subblock A->A is the 1127 diagonal block 1128 */ 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1131 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1132 { 1133 PetscErrorCode ierr; 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 1136 PetscFunctionBegin; 1137 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1138 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1139 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatScale_MPIAIJ" 1145 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1152 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatDestroy_MPIAIJ" 1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1159 { 1160 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 #if defined(PETSC_USE_LOG) 1165 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1166 #endif 1167 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1168 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 1169 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 1170 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 1171 #if defined (PETSC_USE_CTABLE) 1172 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 1173 #else 1174 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1177 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 1178 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 1179 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1180 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1181 ierr = PetscFree(aij);CHKERRQ(ierr); 1182 1183 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1197 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1198 { 1199 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1200 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1201 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1202 PetscErrorCode ierr; 1203 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1204 int fd; 1205 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1206 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1207 PetscScalar *column_values; 1208 PetscInt message_count,flowcontrolcount; 1209 1210 PetscFunctionBegin; 1211 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1212 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1213 nz = A->nz + B->nz; 1214 if (!rank) { 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = mat->rmap->N; 1217 header[2] = mat->cmap->N; 1218 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1219 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1220 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1221 /* get largest number of rows any processor has */ 1222 rlen = mat->rmap->n; 1223 range = mat->rmap->range; 1224 for (i=1; i<size; i++) { 1225 rlen = PetscMax(rlen,range[i+1] - range[i]); 1226 } 1227 } else { 1228 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1229 rlen = mat->rmap->n; 1230 } 1231 1232 /* load up the local row counts */ 1233 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1234 for (i=0; i<mat->rmap->n; i++) { 1235 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1236 } 1237 1238 /* store the row lengths to the file */ 1239 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1240 if (!rank) { 1241 MPI_Status status; 1242 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1243 for (i=1; i<size; i++) { 1244 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1245 rlen = range[i+1] - range[i]; 1246 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1247 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1248 } 1249 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1250 } else { 1251 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1252 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1253 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1254 } 1255 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1256 1257 /* load up the local column indices */ 1258 nzmax = nz; /* )th processor needs space a largest processor needs */ 1259 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1260 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1261 cnt = 0; 1262 for (i=0; i<mat->rmap->n; i++) { 1263 for (j=B->i[i]; j<B->i[i+1]; j++) { 1264 if ( (col = garray[B->j[j]]) > cstart) break; 1265 column_indices[cnt++] = col; 1266 } 1267 for (k=A->i[i]; k<A->i[i+1]; k++) { 1268 column_indices[cnt++] = A->j[k] + cstart; 1269 } 1270 for (; j<B->i[i+1]; j++) { 1271 column_indices[cnt++] = garray[B->j[j]]; 1272 } 1273 } 1274 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1275 1276 /* store the column indices to the file */ 1277 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1278 if (!rank) { 1279 MPI_Status status; 1280 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1283 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1284 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1285 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1286 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1291 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1292 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1293 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1296 1297 /* load up the local column values */ 1298 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ( garray[B->j[j]] > cstart) break; 1303 column_values[cnt++] = B->a[j]; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) { 1306 column_values[cnt++] = A->a[k]; 1307 } 1308 for (; j<B->i[i+1]; j++) { 1309 column_values[cnt++] = B->a[j]; 1310 } 1311 } 1312 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1313 1314 /* store the column values to the file */ 1315 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1316 if (!rank) { 1317 MPI_Status status; 1318 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1319 for (i=1; i<size; i++) { 1320 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1321 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1322 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1323 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1324 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1325 } 1326 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1329 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1331 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1332 } 1333 ierr = PetscFree(column_values);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1339 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 PetscErrorCode ierr; 1343 PetscMPIInt rank = aij->rank,size = aij->size; 1344 PetscBool isdraw,iascii,isbinary; 1345 PetscViewer sviewer; 1346 PetscViewerFormat format; 1347 1348 PetscFunctionBegin; 1349 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1350 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1351 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1352 if (iascii) { 1353 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1354 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1355 MatInfo info; 1356 PetscBool inodes; 1357 1358 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1359 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1360 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1361 if (!inodes) { 1362 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1363 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1364 } else { 1365 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1366 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1367 } 1368 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1370 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1371 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1372 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1373 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1374 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1377 PetscInt inodecount,inodelimit,*inodes; 1378 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1379 if (inodes) { 1380 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1386 PetscFunctionReturn(0); 1387 } 1388 } else if (isbinary) { 1389 if (size == 1) { 1390 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1391 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1392 } else { 1393 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } else if (isdraw) { 1397 PetscDraw draw; 1398 PetscBool isnull; 1399 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1400 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1401 } 1402 1403 if (size == 1) { 1404 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1405 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1406 } else { 1407 /* assemble the entire matrix onto first processor. */ 1408 Mat A; 1409 Mat_SeqAIJ *Aloc; 1410 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1411 MatScalar *a; 1412 1413 if (mat->rmap->N > 1024) { 1414 PetscBool flg = PETSC_FALSE; 1415 1416 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1417 if (!flg) { 1418 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1419 } 1420 } 1421 1422 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1423 if (!rank) { 1424 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1425 } else { 1426 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1427 } 1428 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1429 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1430 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1431 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1432 1433 /* copy over the A part */ 1434 Aloc = (Mat_SeqAIJ*)aij->A->data; 1435 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1436 row = mat->rmap->rstart; 1437 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1438 for (i=0; i<m; i++) { 1439 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1440 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1441 } 1442 aj = Aloc->j; 1443 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1444 1445 /* copy over the B part */ 1446 Aloc = (Mat_SeqAIJ*)aij->B->data; 1447 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1448 row = mat->rmap->rstart; 1449 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1450 ct = cols; 1451 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1452 for (i=0; i<m; i++) { 1453 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1454 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1455 } 1456 ierr = PetscFree(ct);CHKERRQ(ierr); 1457 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1458 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1459 /* 1460 Everyone has to call to draw the matrix since the graphics waits are 1461 synchronized across all processors that share the PetscDraw object 1462 */ 1463 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1464 if (!rank) { 1465 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1466 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1467 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1468 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1469 } 1470 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1471 ierr = MatDestroy(A);CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatView_MPIAIJ" 1478 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1479 { 1480 PetscErrorCode ierr; 1481 PetscBool iascii,isdraw,issocket,isbinary; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1485 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1486 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1487 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1488 if (iascii || isdraw || isbinary || issocket) { 1489 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1490 } else { 1491 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatSOR_MPIAIJ" 1498 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1499 { 1500 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1501 PetscErrorCode ierr; 1502 Vec bb1 = 0; 1503 PetscBool hasop; 1504 1505 PetscFunctionBegin; 1506 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1507 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1508 } 1509 1510 if (flag == SOR_APPLY_UPPER) { 1511 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1516 if (flag & SOR_ZERO_INITIAL_GUESS) { 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1518 its--; 1519 } 1520 1521 while (its--) { 1522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 1525 /* update rhs: bb1 = bb - B*x */ 1526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1531 } 1532 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1533 if (flag & SOR_ZERO_INITIAL_GUESS) { 1534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1535 its--; 1536 } 1537 while (its--) { 1538 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 1541 /* update rhs: bb1 = bb - B*x */ 1542 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1543 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1544 1545 /* local sweep */ 1546 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1547 } 1548 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1549 if (flag & SOR_ZERO_INITIAL_GUESS) { 1550 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1551 its--; 1552 } 1553 while (its--) { 1554 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* update rhs: bb1 = bb - B*x */ 1558 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1559 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1560 1561 /* local sweep */ 1562 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1563 } 1564 } else if (flag & SOR_EISENSTAT) { 1565 Vec xx1; 1566 1567 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1569 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 if (!mat->diag) { 1573 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1574 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1575 } 1576 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1577 if (hasop) { 1578 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1579 } else { 1580 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1581 } 1582 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1583 1584 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1585 1586 /* local sweep */ 1587 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1588 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1589 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1590 } else { 1591 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1592 } 1593 1594 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatPermute_MPIAIJ" 1600 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1601 { 1602 MPI_Comm comm,pcomm; 1603 PetscInt first,local_size,nrows; 1604 const PetscInt *rows; 1605 PetscMPIInt size; 1606 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1611 /* make a collective version of 'rowp' */ 1612 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1613 if (pcomm==comm) { 1614 crowp = rowp; 1615 } else { 1616 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1617 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1618 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1619 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1620 } 1621 /* collect the global row permutation and invert it */ 1622 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1623 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1624 if (pcomm!=comm) { 1625 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1626 } 1627 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1628 /* get the local target indices */ 1629 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1630 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1631 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1632 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1633 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1634 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1635 /* the column permutation is so much easier; 1636 make a local version of 'colp' and invert it */ 1637 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1638 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1639 if (size==1) { 1640 lcolp = colp; 1641 } else { 1642 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1643 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1644 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1645 } 1646 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1647 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1648 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1649 if (size>1) { 1650 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1651 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1652 } 1653 /* now we just get the submatrix */ 1654 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1655 /* clean up */ 1656 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1657 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1663 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1664 { 1665 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1666 Mat A = mat->A,B = mat->B; 1667 PetscErrorCode ierr; 1668 PetscReal isend[5],irecv[5]; 1669 1670 PetscFunctionBegin; 1671 info->block_size = 1.0; 1672 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1673 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1674 isend[3] = info->memory; isend[4] = info->mallocs; 1675 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1676 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1677 isend[3] += info->memory; isend[4] += info->mallocs; 1678 if (flag == MAT_LOCAL) { 1679 info->nz_used = isend[0]; 1680 info->nz_allocated = isend[1]; 1681 info->nz_unneeded = isend[2]; 1682 info->memory = isend[3]; 1683 info->mallocs = isend[4]; 1684 } else if (flag == MAT_GLOBAL_MAX) { 1685 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1686 info->nz_used = irecv[0]; 1687 info->nz_allocated = irecv[1]; 1688 info->nz_unneeded = irecv[2]; 1689 info->memory = irecv[3]; 1690 info->mallocs = irecv[4]; 1691 } else if (flag == MAT_GLOBAL_SUM) { 1692 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1693 info->nz_used = irecv[0]; 1694 info->nz_allocated = irecv[1]; 1695 info->nz_unneeded = irecv[2]; 1696 info->memory = irecv[3]; 1697 info->mallocs = irecv[4]; 1698 } 1699 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1700 info->fill_ratio_needed = 0; 1701 info->factor_mallocs = 0; 1702 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatSetOption_MPIAIJ" 1708 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1709 { 1710 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 switch (op) { 1715 case MAT_NEW_NONZERO_LOCATIONS: 1716 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1717 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1718 case MAT_KEEP_NONZERO_PATTERN: 1719 case MAT_NEW_NONZERO_LOCATION_ERR: 1720 case MAT_USE_INODES: 1721 case MAT_IGNORE_ZERO_ENTRIES: 1722 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1723 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1724 break; 1725 case MAT_ROW_ORIENTED: 1726 a->roworiented = flg; 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1729 break; 1730 case MAT_NEW_DIAGONALS: 1731 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1732 break; 1733 case MAT_IGNORE_OFF_PROC_ENTRIES: 1734 a->donotstash = PETSC_TRUE; 1735 break; 1736 case MAT_SPD: 1737 A->spd_set = PETSC_TRUE; 1738 A->spd = flg; 1739 if (flg) { 1740 A->symmetric = PETSC_TRUE; 1741 A->structurally_symmetric = PETSC_TRUE; 1742 A->symmetric_set = PETSC_TRUE; 1743 A->structurally_symmetric_set = PETSC_TRUE; 1744 } 1745 break; 1746 case MAT_SYMMETRIC: 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 break; 1749 case MAT_STRUCTURALLY_SYMMETRIC: 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 break; 1752 case MAT_HERMITIAN: 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 break; 1755 case MAT_SYMMETRY_ETERNAL: 1756 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1757 break; 1758 default: 1759 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1760 } 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatGetRow_MPIAIJ" 1766 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1767 { 1768 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1769 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1770 PetscErrorCode ierr; 1771 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1772 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1773 PetscInt *cmap,*idx_p; 1774 1775 PetscFunctionBegin; 1776 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1777 mat->getrowactive = PETSC_TRUE; 1778 1779 if (!mat->rowvalues && (idx || v)) { 1780 /* 1781 allocate enough space to hold information from the longest row. 1782 */ 1783 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1784 PetscInt max = 1,tmp; 1785 for (i=0; i<matin->rmap->n; i++) { 1786 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1787 if (max < tmp) { max = tmp; } 1788 } 1789 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1790 } 1791 1792 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1793 lrow = row - rstart; 1794 1795 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1796 if (!v) {pvA = 0; pvB = 0;} 1797 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1798 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1799 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1800 nztot = nzA + nzB; 1801 1802 cmap = mat->garray; 1803 if (v || idx) { 1804 if (nztot) { 1805 /* Sort by increasing column numbers, assuming A and B already sorted */ 1806 PetscInt imark = -1; 1807 if (v) { 1808 *v = v_p = mat->rowvalues; 1809 for (i=0; i<nzB; i++) { 1810 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1811 else break; 1812 } 1813 imark = i; 1814 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1815 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1816 } 1817 if (idx) { 1818 *idx = idx_p = mat->rowindices; 1819 if (imark > -1) { 1820 for (i=0; i<imark; i++) { 1821 idx_p[i] = cmap[cworkB[i]]; 1822 } 1823 } else { 1824 for (i=0; i<nzB; i++) { 1825 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1826 else break; 1827 } 1828 imark = i; 1829 } 1830 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1831 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1832 } 1833 } else { 1834 if (idx) *idx = 0; 1835 if (v) *v = 0; 1836 } 1837 } 1838 *nz = nztot; 1839 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1840 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1846 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1847 { 1848 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1849 1850 PetscFunctionBegin; 1851 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1852 aij->getrowactive = PETSC_FALSE; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatNorm_MPIAIJ" 1858 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1859 { 1860 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1861 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1862 PetscErrorCode ierr; 1863 PetscInt i,j,cstart = mat->cmap->rstart; 1864 PetscReal sum = 0.0; 1865 MatScalar *v; 1866 1867 PetscFunctionBegin; 1868 if (aij->size == 1) { 1869 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1870 } else { 1871 if (type == NORM_FROBENIUS) { 1872 v = amat->a; 1873 for (i=0; i<amat->nz; i++) { 1874 #if defined(PETSC_USE_COMPLEX) 1875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1876 #else 1877 sum += (*v)*(*v); v++; 1878 #endif 1879 } 1880 v = bmat->a; 1881 for (i=0; i<bmat->nz; i++) { 1882 #if defined(PETSC_USE_COMPLEX) 1883 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1884 #else 1885 sum += (*v)*(*v); v++; 1886 #endif 1887 } 1888 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1889 *norm = sqrt(*norm); 1890 } else if (type == NORM_1) { /* max column norm */ 1891 PetscReal *tmp,*tmp2; 1892 PetscInt *jj,*garray = aij->garray; 1893 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1894 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1895 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1896 *norm = 0.0; 1897 v = amat->a; jj = amat->j; 1898 for (j=0; j<amat->nz; j++) { 1899 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1900 } 1901 v = bmat->a; jj = bmat->j; 1902 for (j=0; j<bmat->nz; j++) { 1903 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1904 } 1905 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1906 for (j=0; j<mat->cmap->N; j++) { 1907 if (tmp2[j] > *norm) *norm = tmp2[j]; 1908 } 1909 ierr = PetscFree(tmp);CHKERRQ(ierr); 1910 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1911 } else if (type == NORM_INFINITY) { /* max row norm */ 1912 PetscReal ntemp = 0.0; 1913 for (j=0; j<aij->A->rmap->n; j++) { 1914 v = amat->a + amat->i[j]; 1915 sum = 0.0; 1916 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1917 sum += PetscAbsScalar(*v); v++; 1918 } 1919 v = bmat->a + bmat->i[j]; 1920 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1921 sum += PetscAbsScalar(*v); v++; 1922 } 1923 if (sum > ntemp) ntemp = sum; 1924 } 1925 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1926 } else { 1927 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1928 } 1929 } 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatTranspose_MPIAIJ" 1935 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1936 { 1937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1938 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1939 PetscErrorCode ierr; 1940 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1941 PetscInt cstart=A->cmap->rstart,ncol; 1942 Mat B; 1943 MatScalar *array; 1944 1945 PetscFunctionBegin; 1946 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1947 1948 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1949 ai = Aloc->i; aj = Aloc->j; 1950 bi = Bloc->i; bj = Bloc->j; 1951 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1952 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1953 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1954 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1955 for (i=0; i<ai[ma]; i++){ 1956 d_nnz[aj[i]] ++; 1957 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1958 } 1959 1960 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1961 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1962 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1964 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1965 } else { 1966 B = *matout; 1967 } 1968 1969 /* copy over the A part */ 1970 array = Aloc->a; 1971 row = A->rmap->rstart; 1972 for (i=0; i<ma; i++) { 1973 ncol = ai[i+1]-ai[i]; 1974 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1975 row++; array += ncol; aj += ncol; 1976 } 1977 aj = Aloc->j; 1978 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1979 1980 /* copy over the B part */ 1981 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1982 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1983 array = Bloc->a; 1984 row = A->rmap->rstart; 1985 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1986 cols_tmp = cols; 1987 for (i=0; i<mb; i++) { 1988 ncol = bi[i+1]-bi[i]; 1989 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1990 row++; array += ncol; cols_tmp += ncol; 1991 } 1992 ierr = PetscFree(cols);CHKERRQ(ierr); 1993 1994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1997 *matout = B; 1998 } else { 1999 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2007 { 2008 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2009 Mat a = aij->A,b = aij->B; 2010 PetscErrorCode ierr; 2011 PetscInt s1,s2,s3; 2012 2013 PetscFunctionBegin; 2014 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2015 if (rr) { 2016 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2017 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2018 /* Overlap communication with computation. */ 2019 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 } 2021 if (ll) { 2022 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2023 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2024 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2025 } 2026 /* scale the diagonal block */ 2027 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2028 2029 if (rr) { 2030 /* Do a scatter end and then right scale the off-diagonal block */ 2031 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2033 } 2034 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2040 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2041 { 2042 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2043 PetscErrorCode ierr; 2044 2045 PetscFunctionBegin; 2046 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2047 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2048 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2049 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2054 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2055 { 2056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatEqual_MPIAIJ" 2066 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2067 { 2068 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2069 Mat a,b,c,d; 2070 PetscBool flg; 2071 PetscErrorCode ierr; 2072 2073 PetscFunctionBegin; 2074 a = matA->A; b = matA->B; 2075 c = matB->A; d = matB->B; 2076 2077 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2078 if (flg) { 2079 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2080 } 2081 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatCopy_MPIAIJ" 2087 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2088 { 2089 PetscErrorCode ierr; 2090 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2091 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2092 2093 PetscFunctionBegin; 2094 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2095 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2096 /* because of the column compression in the off-processor part of the matrix a->B, 2097 the number of columns in a->B and b->B may be different, hence we cannot call 2098 the MatCopy() directly on the two parts. If need be, we can provide a more 2099 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2100 then copying the submatrices */ 2101 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2102 } else { 2103 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2104 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2105 } 2106 PetscFunctionReturn(0); 2107 } 2108 2109 #undef __FUNCT__ 2110 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2111 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2112 { 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatAXPY_MPIAIJ" 2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2123 { 2124 PetscErrorCode ierr; 2125 PetscInt i; 2126 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2127 PetscBLASInt bnz,one=1; 2128 Mat_SeqAIJ *x,*y; 2129 2130 PetscFunctionBegin; 2131 if (str == SAME_NONZERO_PATTERN) { 2132 PetscScalar alpha = a; 2133 x = (Mat_SeqAIJ *)xx->A->data; 2134 y = (Mat_SeqAIJ *)yy->A->data; 2135 bnz = PetscBLASIntCast(x->nz); 2136 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2137 x = (Mat_SeqAIJ *)xx->B->data; 2138 y = (Mat_SeqAIJ *)yy->B->data; 2139 bnz = PetscBLASIntCast(x->nz); 2140 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2141 } else if (str == SUBSET_NONZERO_PATTERN) { 2142 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2143 2144 x = (Mat_SeqAIJ *)xx->B->data; 2145 y = (Mat_SeqAIJ *)yy->B->data; 2146 if (y->xtoy && y->XtoY != xx->B) { 2147 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2148 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2149 } 2150 if (!y->xtoy) { /* get xtoy */ 2151 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2152 y->XtoY = xx->B; 2153 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2154 } 2155 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2156 } else { 2157 Mat B; 2158 PetscInt *nnz_d,*nnz_o; 2159 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2160 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2161 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2162 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2163 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2164 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2165 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2166 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2167 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2168 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2169 ierr = MatHeaderReplace(Y,B); 2170 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2171 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatConjugate_MPIAIJ" 2180 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2181 { 2182 #if defined(PETSC_USE_COMPLEX) 2183 PetscErrorCode ierr; 2184 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2185 2186 PetscFunctionBegin; 2187 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2188 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2189 #else 2190 PetscFunctionBegin; 2191 #endif 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatRealPart_MPIAIJ" 2197 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2198 { 2199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2204 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2210 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2211 { 2212 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2217 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #ifdef PETSC_HAVE_PBGL 2222 2223 #include <boost/parallel/mpi/bsp_process_group.hpp> 2224 #include <boost/graph/distributed/ilu_default_graph.hpp> 2225 #include <boost/graph/distributed/ilu_0_block.hpp> 2226 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2227 #include <boost/graph/distributed/petsc/interface.hpp> 2228 #include <boost/multi_array.hpp> 2229 #include <boost/parallel/distributed_property_map->hpp> 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2233 /* 2234 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2235 */ 2236 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2237 { 2238 namespace petsc = boost::distributed::petsc; 2239 2240 namespace graph_dist = boost::graph::distributed; 2241 using boost::graph::distributed::ilu_default::process_group_type; 2242 using boost::graph::ilu_permuted; 2243 2244 PetscBool row_identity, col_identity; 2245 PetscContainer c; 2246 PetscInt m, n, M, N; 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2251 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2252 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2253 if (!row_identity || !col_identity) { 2254 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2255 } 2256 2257 process_group_type pg; 2258 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2259 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2260 lgraph_type& level_graph = *lgraph_p; 2261 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2262 2263 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2264 ilu_permuted(level_graph); 2265 2266 /* put together the new matrix */ 2267 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2268 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2269 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2270 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2271 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2272 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 2275 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2276 ierr = PetscContainerSetPointer(c, lgraph_p); 2277 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2284 { 2285 PetscFunctionBegin; 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatSolve_MPIAIJ" 2291 /* 2292 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2293 */ 2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2295 { 2296 namespace graph_dist = boost::graph::distributed; 2297 2298 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2299 lgraph_type* lgraph_p; 2300 PetscContainer c; 2301 PetscErrorCode ierr; 2302 2303 PetscFunctionBegin; 2304 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2305 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2306 ierr = VecCopy(b, x);CHKERRQ(ierr); 2307 2308 PetscScalar* array_x; 2309 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2310 PetscInt sx; 2311 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2312 2313 PetscScalar* array_b; 2314 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2315 PetscInt sb; 2316 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2317 2318 lgraph_type& level_graph = *lgraph_p; 2319 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2320 2321 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2322 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2323 ref_x(array_x, boost::extents[num_vertices(graph)]); 2324 2325 typedef boost::iterator_property_map<array_ref_type::iterator, 2326 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2327 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2328 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2329 2330 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2331 2332 PetscFunctionReturn(0); 2333 } 2334 #endif 2335 2336 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2337 PetscInt nzlocal,nsends,nrecvs; 2338 PetscMPIInt *send_rank,*recv_rank; 2339 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2340 PetscScalar *sbuf_a,**rbuf_a; 2341 PetscErrorCode (*MatDestroy)(Mat); 2342 } Mat_Redundant; 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2346 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2347 { 2348 PetscErrorCode ierr; 2349 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2350 PetscInt i; 2351 2352 PetscFunctionBegin; 2353 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2354 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2355 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2356 for (i=0; i<redund->nrecvs; i++){ 2357 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2358 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2359 } 2360 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2361 ierr = PetscFree(redund);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatDestroy_MatRedundant" 2367 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2368 { 2369 PetscErrorCode ierr; 2370 PetscContainer container; 2371 Mat_Redundant *redund=PETSC_NULL; 2372 2373 PetscFunctionBegin; 2374 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2375 if (container) { 2376 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2377 } else { 2378 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2379 } 2380 A->ops->destroy = redund->MatDestroy; 2381 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2382 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2383 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2389 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2390 { 2391 PetscMPIInt rank,size; 2392 MPI_Comm comm=((PetscObject)mat)->comm; 2393 PetscErrorCode ierr; 2394 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2395 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2396 PetscInt *rowrange=mat->rmap->range; 2397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2398 Mat A=aij->A,B=aij->B,C=*matredundant; 2399 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2400 PetscScalar *sbuf_a; 2401 PetscInt nzlocal=a->nz+b->nz; 2402 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2403 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2404 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2405 MatScalar *aworkA,*aworkB; 2406 PetscScalar *vals; 2407 PetscMPIInt tag1,tag2,tag3,imdex; 2408 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2409 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2410 MPI_Status recv_status,*send_status; 2411 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2412 PetscInt **rbuf_j=PETSC_NULL; 2413 PetscScalar **rbuf_a=PETSC_NULL; 2414 Mat_Redundant *redund=PETSC_NULL; 2415 PetscContainer container; 2416 2417 PetscFunctionBegin; 2418 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2419 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2420 2421 if (reuse == MAT_REUSE_MATRIX) { 2422 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2423 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2424 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2425 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2426 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2427 if (container) { 2428 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2429 } else { 2430 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2431 } 2432 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2433 2434 nsends = redund->nsends; 2435 nrecvs = redund->nrecvs; 2436 send_rank = redund->send_rank; 2437 recv_rank = redund->recv_rank; 2438 sbuf_nz = redund->sbuf_nz; 2439 rbuf_nz = redund->rbuf_nz; 2440 sbuf_j = redund->sbuf_j; 2441 sbuf_a = redund->sbuf_a; 2442 rbuf_j = redund->rbuf_j; 2443 rbuf_a = redund->rbuf_a; 2444 } 2445 2446 if (reuse == MAT_INITIAL_MATRIX){ 2447 PetscMPIInt subrank,subsize; 2448 PetscInt nleftover,np_subcomm; 2449 /* get the destination processors' id send_rank, nsends and nrecvs */ 2450 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2451 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2452 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2453 np_subcomm = size/nsubcomm; 2454 nleftover = size - nsubcomm*np_subcomm; 2455 nsends = 0; nrecvs = 0; 2456 for (i=0; i<size; i++){ /* i=rank*/ 2457 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2458 send_rank[nsends] = i; nsends++; 2459 recv_rank[nrecvs++] = i; 2460 } 2461 } 2462 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2463 i = size-nleftover-1; 2464 j = 0; 2465 while (j < nsubcomm - nleftover){ 2466 send_rank[nsends++] = i; 2467 i--; j++; 2468 } 2469 } 2470 2471 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2472 for (i=0; i<nleftover; i++){ 2473 recv_rank[nrecvs++] = size-nleftover+i; 2474 } 2475 } 2476 2477 /* allocate sbuf_j, sbuf_a */ 2478 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2479 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2480 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2481 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2482 2483 /* copy mat's local entries into the buffers */ 2484 if (reuse == MAT_INITIAL_MATRIX){ 2485 rownz_max = 0; 2486 rptr = sbuf_j; 2487 cols = sbuf_j + rend-rstart + 1; 2488 vals = sbuf_a; 2489 rptr[0] = 0; 2490 for (i=0; i<rend-rstart; i++){ 2491 row = i + rstart; 2492 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2493 ncols = nzA + nzB; 2494 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2495 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2496 /* load the column indices for this row into cols */ 2497 lwrite = 0; 2498 for (l=0; l<nzB; l++) { 2499 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2500 vals[lwrite] = aworkB[l]; 2501 cols[lwrite++] = ctmp; 2502 } 2503 } 2504 for (l=0; l<nzA; l++){ 2505 vals[lwrite] = aworkA[l]; 2506 cols[lwrite++] = cstart + cworkA[l]; 2507 } 2508 for (l=0; l<nzB; l++) { 2509 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2510 vals[lwrite] = aworkB[l]; 2511 cols[lwrite++] = ctmp; 2512 } 2513 } 2514 vals += ncols; 2515 cols += ncols; 2516 rptr[i+1] = rptr[i] + ncols; 2517 if (rownz_max < ncols) rownz_max = ncols; 2518 } 2519 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2520 } else { /* only copy matrix values into sbuf_a */ 2521 rptr = sbuf_j; 2522 vals = sbuf_a; 2523 rptr[0] = 0; 2524 for (i=0; i<rend-rstart; i++){ 2525 row = i + rstart; 2526 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2527 ncols = nzA + nzB; 2528 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2529 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2530 lwrite = 0; 2531 for (l=0; l<nzB; l++) { 2532 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2533 } 2534 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2535 for (l=0; l<nzB; l++) { 2536 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2537 } 2538 vals += ncols; 2539 rptr[i+1] = rptr[i] + ncols; 2540 } 2541 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2542 2543 /* send nzlocal to others, and recv other's nzlocal */ 2544 /*--------------------------------------------------*/ 2545 if (reuse == MAT_INITIAL_MATRIX){ 2546 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2547 s_waits2 = s_waits3 + nsends; 2548 s_waits1 = s_waits2 + nsends; 2549 r_waits1 = s_waits1 + nsends; 2550 r_waits2 = r_waits1 + nrecvs; 2551 r_waits3 = r_waits2 + nrecvs; 2552 } else { 2553 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2554 r_waits3 = s_waits3 + nsends; 2555 } 2556 2557 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2558 if (reuse == MAT_INITIAL_MATRIX){ 2559 /* get new tags to keep the communication clean */ 2560 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2561 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2562 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2563 2564 /* post receives of other's nzlocal */ 2565 for (i=0; i<nrecvs; i++){ 2566 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2567 } 2568 /* send nzlocal to others */ 2569 for (i=0; i<nsends; i++){ 2570 sbuf_nz[i] = nzlocal; 2571 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2572 } 2573 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2574 count = nrecvs; 2575 while (count) { 2576 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2577 recv_rank[imdex] = recv_status.MPI_SOURCE; 2578 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2579 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2580 2581 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2582 rbuf_nz[imdex] += i + 2; 2583 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2584 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2585 count--; 2586 } 2587 /* wait on sends of nzlocal */ 2588 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2589 /* send mat->i,j to others, and recv from other's */ 2590 /*------------------------------------------------*/ 2591 for (i=0; i<nsends; i++){ 2592 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2593 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2594 } 2595 /* wait on receives of mat->i,j */ 2596 /*------------------------------*/ 2597 count = nrecvs; 2598 while (count) { 2599 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2600 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2601 count--; 2602 } 2603 /* wait on sends of mat->i,j */ 2604 /*---------------------------*/ 2605 if (nsends) { 2606 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2607 } 2608 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2609 2610 /* post receives, send and receive mat->a */ 2611 /*----------------------------------------*/ 2612 for (imdex=0; imdex<nrecvs; imdex++) { 2613 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2614 } 2615 for (i=0; i<nsends; i++){ 2616 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2617 } 2618 count = nrecvs; 2619 while (count) { 2620 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2621 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2622 count--; 2623 } 2624 if (nsends) { 2625 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2626 } 2627 2628 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2629 2630 /* create redundant matrix */ 2631 /*-------------------------*/ 2632 if (reuse == MAT_INITIAL_MATRIX){ 2633 /* compute rownz_max for preallocation */ 2634 for (imdex=0; imdex<nrecvs; imdex++){ 2635 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2636 rptr = rbuf_j[imdex]; 2637 for (i=0; i<j; i++){ 2638 ncols = rptr[i+1] - rptr[i]; 2639 if (rownz_max < ncols) rownz_max = ncols; 2640 } 2641 } 2642 2643 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2644 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2645 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2646 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2647 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2648 } else { 2649 C = *matredundant; 2650 } 2651 2652 /* insert local matrix entries */ 2653 rptr = sbuf_j; 2654 cols = sbuf_j + rend-rstart + 1; 2655 vals = sbuf_a; 2656 for (i=0; i<rend-rstart; i++){ 2657 row = i + rstart; 2658 ncols = rptr[i+1] - rptr[i]; 2659 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2660 vals += ncols; 2661 cols += ncols; 2662 } 2663 /* insert received matrix entries */ 2664 for (imdex=0; imdex<nrecvs; imdex++){ 2665 rstart = rowrange[recv_rank[imdex]]; 2666 rend = rowrange[recv_rank[imdex]+1]; 2667 rptr = rbuf_j[imdex]; 2668 cols = rbuf_j[imdex] + rend-rstart + 1; 2669 vals = rbuf_a[imdex]; 2670 for (i=0; i<rend-rstart; i++){ 2671 row = i + rstart; 2672 ncols = rptr[i+1] - rptr[i]; 2673 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2674 vals += ncols; 2675 cols += ncols; 2676 } 2677 } 2678 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2681 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2682 if (reuse == MAT_INITIAL_MATRIX){ 2683 PetscContainer container; 2684 *matredundant = C; 2685 /* create a supporting struct and attach it to C for reuse */ 2686 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2687 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2688 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2689 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2690 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2691 2692 redund->nzlocal = nzlocal; 2693 redund->nsends = nsends; 2694 redund->nrecvs = nrecvs; 2695 redund->send_rank = send_rank; 2696 redund->recv_rank = recv_rank; 2697 redund->sbuf_nz = sbuf_nz; 2698 redund->rbuf_nz = rbuf_nz; 2699 redund->sbuf_j = sbuf_j; 2700 redund->sbuf_a = sbuf_a; 2701 redund->rbuf_j = rbuf_j; 2702 redund->rbuf_a = rbuf_a; 2703 2704 redund->MatDestroy = C->ops->destroy; 2705 C->ops->destroy = MatDestroy_MatRedundant; 2706 } 2707 PetscFunctionReturn(0); 2708 } 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2712 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2713 { 2714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2715 PetscErrorCode ierr; 2716 PetscInt i,*idxb = 0; 2717 PetscScalar *va,*vb; 2718 Vec vtmp; 2719 2720 PetscFunctionBegin; 2721 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2722 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2723 if (idx) { 2724 for (i=0; i<A->rmap->n; i++) { 2725 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2726 } 2727 } 2728 2729 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2730 if (idx) { 2731 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2732 } 2733 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2734 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2735 2736 for (i=0; i<A->rmap->n; i++){ 2737 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2738 va[i] = vb[i]; 2739 if (idx) idx[i] = a->garray[idxb[i]]; 2740 } 2741 } 2742 2743 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2744 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2745 ierr = PetscFree(idxb);CHKERRQ(ierr); 2746 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2752 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2753 { 2754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2755 PetscErrorCode ierr; 2756 PetscInt i,*idxb = 0; 2757 PetscScalar *va,*vb; 2758 Vec vtmp; 2759 2760 PetscFunctionBegin; 2761 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2762 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2763 if (idx) { 2764 for (i=0; i<A->cmap->n; i++) { 2765 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2766 } 2767 } 2768 2769 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2770 if (idx) { 2771 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2772 } 2773 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2774 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2775 2776 for (i=0; i<A->rmap->n; i++){ 2777 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2778 va[i] = vb[i]; 2779 if (idx) idx[i] = a->garray[idxb[i]]; 2780 } 2781 } 2782 2783 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2784 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2785 ierr = PetscFree(idxb);CHKERRQ(ierr); 2786 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2792 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2793 { 2794 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2795 PetscInt n = A->rmap->n; 2796 PetscInt cstart = A->cmap->rstart; 2797 PetscInt *cmap = mat->garray; 2798 PetscInt *diagIdx, *offdiagIdx; 2799 Vec diagV, offdiagV; 2800 PetscScalar *a, *diagA, *offdiagA; 2801 PetscInt r; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2806 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2807 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2808 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2809 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2810 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2811 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2812 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2813 for(r = 0; r < n; ++r) { 2814 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2815 a[r] = diagA[r]; 2816 idx[r] = cstart + diagIdx[r]; 2817 } else { 2818 a[r] = offdiagA[r]; 2819 idx[r] = cmap[offdiagIdx[r]]; 2820 } 2821 } 2822 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2823 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2824 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2825 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2826 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2827 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2833 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2834 { 2835 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2836 PetscInt n = A->rmap->n; 2837 PetscInt cstart = A->cmap->rstart; 2838 PetscInt *cmap = mat->garray; 2839 PetscInt *diagIdx, *offdiagIdx; 2840 Vec diagV, offdiagV; 2841 PetscScalar *a, *diagA, *offdiagA; 2842 PetscInt r; 2843 PetscErrorCode ierr; 2844 2845 PetscFunctionBegin; 2846 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2847 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2848 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2849 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2850 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2851 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2852 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2853 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2854 for(r = 0; r < n; ++r) { 2855 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2856 a[r] = diagA[r]; 2857 idx[r] = cstart + diagIdx[r]; 2858 } else { 2859 a[r] = offdiagA[r]; 2860 idx[r] = cmap[offdiagIdx[r]]; 2861 } 2862 } 2863 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2864 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2865 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2866 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2867 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2868 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2869 PetscFunctionReturn(0); 2870 } 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2874 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2875 { 2876 PetscErrorCode ierr; 2877 Mat *dummy; 2878 2879 PetscFunctionBegin; 2880 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2881 *newmat = *dummy; 2882 ierr = PetscFree(dummy);CHKERRQ(ierr); 2883 PetscFunctionReturn(0); 2884 } 2885 2886 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2887 /* -------------------------------------------------------------------*/ 2888 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2889 MatGetRow_MPIAIJ, 2890 MatRestoreRow_MPIAIJ, 2891 MatMult_MPIAIJ, 2892 /* 4*/ MatMultAdd_MPIAIJ, 2893 MatMultTranspose_MPIAIJ, 2894 MatMultTransposeAdd_MPIAIJ, 2895 #ifdef PETSC_HAVE_PBGL 2896 MatSolve_MPIAIJ, 2897 #else 2898 0, 2899 #endif 2900 0, 2901 0, 2902 /*10*/ 0, 2903 0, 2904 0, 2905 MatSOR_MPIAIJ, 2906 MatTranspose_MPIAIJ, 2907 /*15*/ MatGetInfo_MPIAIJ, 2908 MatEqual_MPIAIJ, 2909 MatGetDiagonal_MPIAIJ, 2910 MatDiagonalScale_MPIAIJ, 2911 MatNorm_MPIAIJ, 2912 /*20*/ MatAssemblyBegin_MPIAIJ, 2913 MatAssemblyEnd_MPIAIJ, 2914 MatSetOption_MPIAIJ, 2915 MatZeroEntries_MPIAIJ, 2916 /*24*/ MatZeroRows_MPIAIJ, 2917 0, 2918 #ifdef PETSC_HAVE_PBGL 2919 0, 2920 #else 2921 0, 2922 #endif 2923 0, 2924 0, 2925 /*29*/ MatSetUpPreallocation_MPIAIJ, 2926 #ifdef PETSC_HAVE_PBGL 2927 0, 2928 #else 2929 0, 2930 #endif 2931 0, 2932 0, 2933 0, 2934 /*34*/ MatDuplicate_MPIAIJ, 2935 0, 2936 0, 2937 0, 2938 0, 2939 /*39*/ MatAXPY_MPIAIJ, 2940 MatGetSubMatrices_MPIAIJ, 2941 MatIncreaseOverlap_MPIAIJ, 2942 MatGetValues_MPIAIJ, 2943 MatCopy_MPIAIJ, 2944 /*44*/ MatGetRowMax_MPIAIJ, 2945 MatScale_MPIAIJ, 2946 0, 2947 0, 2948 MatZeroRowsColumns_MPIAIJ, 2949 /*49*/ MatSetBlockSize_MPIAIJ, 2950 0, 2951 0, 2952 0, 2953 0, 2954 /*54*/ MatFDColoringCreate_MPIAIJ, 2955 0, 2956 MatSetUnfactored_MPIAIJ, 2957 MatPermute_MPIAIJ, 2958 0, 2959 /*59*/ MatGetSubMatrix_MPIAIJ, 2960 MatDestroy_MPIAIJ, 2961 MatView_MPIAIJ, 2962 0, 2963 0, 2964 /*64*/ 0, 2965 0, 2966 0, 2967 0, 2968 0, 2969 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2970 MatGetRowMinAbs_MPIAIJ, 2971 0, 2972 MatSetColoring_MPIAIJ, 2973 #if defined(PETSC_HAVE_ADIC) 2974 MatSetValuesAdic_MPIAIJ, 2975 #else 2976 0, 2977 #endif 2978 MatSetValuesAdifor_MPIAIJ, 2979 /*75*/ MatFDColoringApply_AIJ, 2980 0, 2981 0, 2982 0, 2983 0, 2984 /*80*/ 0, 2985 0, 2986 0, 2987 /*83*/ MatLoad_MPIAIJ, 2988 0, 2989 0, 2990 0, 2991 0, 2992 0, 2993 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2994 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2995 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2996 MatPtAP_Basic, 2997 MatPtAPSymbolic_MPIAIJ, 2998 /*94*/ MatPtAPNumeric_MPIAIJ, 2999 0, 3000 0, 3001 0, 3002 0, 3003 /*99*/ 0, 3004 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3005 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3006 MatConjugate_MPIAIJ, 3007 0, 3008 /*104*/MatSetValuesRow_MPIAIJ, 3009 MatRealPart_MPIAIJ, 3010 MatImaginaryPart_MPIAIJ, 3011 0, 3012 0, 3013 /*109*/0, 3014 MatGetRedundantMatrix_MPIAIJ, 3015 MatGetRowMin_MPIAIJ, 3016 0, 3017 0, 3018 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3019 0, 3020 0, 3021 0, 3022 0, 3023 /*119*/0, 3024 0, 3025 0, 3026 0, 3027 MatGetMultiProcBlock_MPIAIJ, 3028 /*124*/MatFindNonZeroRows_MPIAIJ, 3029 0, 3030 0, 3031 0, 3032 MatGetSubMatricesParallel_MPIAIJ 3033 }; 3034 3035 /* ----------------------------------------------------------------------------------------*/ 3036 3037 EXTERN_C_BEGIN 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3040 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3041 { 3042 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3043 PetscErrorCode ierr; 3044 3045 PetscFunctionBegin; 3046 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3047 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 EXTERN_C_END 3051 3052 EXTERN_C_BEGIN 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3055 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3056 { 3057 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3058 PetscErrorCode ierr; 3059 3060 PetscFunctionBegin; 3061 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3062 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 EXTERN_C_BEGIN 3068 #undef __FUNCT__ 3069 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3070 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3071 { 3072 Mat_MPIAIJ *b; 3073 PetscErrorCode ierr; 3074 PetscInt i; 3075 3076 PetscFunctionBegin; 3077 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3078 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3079 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3080 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3081 3082 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3083 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3084 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3085 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3086 if (d_nnz) { 3087 for (i=0; i<B->rmap->n; i++) { 3088 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3089 } 3090 } 3091 if (o_nnz) { 3092 for (i=0; i<B->rmap->n; i++) { 3093 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3094 } 3095 } 3096 b = (Mat_MPIAIJ*)B->data; 3097 3098 if (!B->preallocated) { 3099 /* Explicitly create 2 MATSEQAIJ matrices. */ 3100 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3101 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3102 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3103 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3104 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3105 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3106 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3107 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3108 } 3109 3110 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3111 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3112 B->preallocated = PETSC_TRUE; 3113 PetscFunctionReturn(0); 3114 } 3115 EXTERN_C_END 3116 3117 #undef __FUNCT__ 3118 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3119 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3120 { 3121 Mat mat; 3122 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3123 PetscErrorCode ierr; 3124 3125 PetscFunctionBegin; 3126 *newmat = 0; 3127 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3128 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3129 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3130 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3131 a = (Mat_MPIAIJ*)mat->data; 3132 3133 mat->factortype = matin->factortype; 3134 mat->rmap->bs = matin->rmap->bs; 3135 mat->assembled = PETSC_TRUE; 3136 mat->insertmode = NOT_SET_VALUES; 3137 mat->preallocated = PETSC_TRUE; 3138 3139 a->size = oldmat->size; 3140 a->rank = oldmat->rank; 3141 a->donotstash = oldmat->donotstash; 3142 a->roworiented = oldmat->roworiented; 3143 a->rowindices = 0; 3144 a->rowvalues = 0; 3145 a->getrowactive = PETSC_FALSE; 3146 3147 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3148 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3149 3150 if (oldmat->colmap) { 3151 #if defined (PETSC_USE_CTABLE) 3152 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3153 #else 3154 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3155 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3156 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3157 #endif 3158 } else a->colmap = 0; 3159 if (oldmat->garray) { 3160 PetscInt len; 3161 len = oldmat->B->cmap->n; 3162 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3163 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3164 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3165 } else a->garray = 0; 3166 3167 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3168 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3169 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3170 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3171 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3172 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3173 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3174 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3175 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3176 *newmat = mat; 3177 PetscFunctionReturn(0); 3178 } 3179 3180 /* 3181 Allows sending/receiving larger messages then 2 gigabytes in a single call 3182 */ 3183 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3184 { 3185 int ierr; 3186 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3187 PetscInt i,numchunks; 3188 PetscMPIInt icnt; 3189 3190 numchunks = cnt/CHUNKSIZE + 1; 3191 for (i=0; i<numchunks; i++) { 3192 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3193 ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr; 3194 if (type == MPIU_INT) { 3195 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3196 } else if (type == MPIU_SCALAR) { 3197 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3198 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3199 } 3200 return 0; 3201 } 3202 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3203 { 3204 int ierr; 3205 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3206 MPI_Status status; 3207 PetscInt i,numchunks; 3208 PetscMPIInt icnt; 3209 3210 numchunks = cnt/CHUNKSIZE + 1; 3211 for (i=0; i<numchunks; i++) { 3212 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3213 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr; 3214 if (type == MPIU_INT) { 3215 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3216 } else if (type == MPIU_SCALAR) { 3217 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3218 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3219 } 3220 return 0; 3221 } 3222 3223 #undef __FUNCT__ 3224 #define __FUNCT__ "MatLoad_MPIAIJ" 3225 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3226 { 3227 PetscScalar *vals,*svals; 3228 MPI_Comm comm = ((PetscObject)viewer)->comm; 3229 PetscErrorCode ierr; 3230 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3231 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3232 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3233 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3234 PetscInt cend,cstart,n,*rowners,sizesset=1; 3235 int fd; 3236 3237 PetscFunctionBegin; 3238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3239 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3240 if (!rank) { 3241 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3242 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3243 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3244 } 3245 3246 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3247 3248 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3249 M = header[1]; N = header[2]; 3250 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3251 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3252 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3253 3254 /* If global sizes are set, check if they are consistent with that given in the file */ 3255 if (sizesset) { 3256 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3257 } 3258 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3259 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3260 3261 /* determine ownership of all rows */ 3262 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3263 else m = newMat->rmap->n; /* Set by user */ 3264 3265 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3266 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3267 3268 /* First process needs enough room for process with most rows */ 3269 if (!rank) { 3270 mmax = rowners[1]; 3271 for (i=2; i<size; i++) { 3272 mmax = PetscMax(mmax,rowners[i]); 3273 } 3274 } else mmax = m; 3275 3276 rowners[0] = 0; 3277 for (i=2; i<=size; i++) { 3278 rowners[i] += rowners[i-1]; 3279 } 3280 rstart = rowners[rank]; 3281 rend = rowners[rank+1]; 3282 3283 /* distribute row lengths to all processors */ 3284 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3285 if (!rank) { 3286 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3287 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3288 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3289 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3290 for (j=0; j<m; j++) { 3291 procsnz[0] += ourlens[j]; 3292 } 3293 for (i=1; i<size; i++) { 3294 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3295 /* calculate the number of nonzeros on each processor */ 3296 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3297 procsnz[i] += rowlengths[j]; 3298 } 3299 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3300 } 3301 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3302 } else { 3303 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3304 } 3305 3306 if (!rank) { 3307 /* determine max buffer needed and allocate it */ 3308 maxnz = 0; 3309 for (i=0; i<size; i++) { 3310 maxnz = PetscMax(maxnz,procsnz[i]); 3311 } 3312 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3313 3314 /* read in my part of the matrix column indices */ 3315 nz = procsnz[0]; 3316 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3317 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3318 3319 /* read in every one elses and ship off */ 3320 for (i=1; i<size; i++) { 3321 nz = procsnz[i]; 3322 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3323 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3324 } 3325 ierr = PetscFree(cols);CHKERRQ(ierr); 3326 } else { 3327 /* determine buffer space needed for message */ 3328 nz = 0; 3329 for (i=0; i<m; i++) { 3330 nz += ourlens[i]; 3331 } 3332 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3333 3334 /* receive message of column indices*/ 3335 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3336 } 3337 3338 /* determine column ownership if matrix is not square */ 3339 if (N != M) { 3340 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3341 else n = newMat->cmap->n; 3342 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3343 cstart = cend - n; 3344 } else { 3345 cstart = rstart; 3346 cend = rend; 3347 n = cend - cstart; 3348 } 3349 3350 /* loop over local rows, determining number of off diagonal entries */ 3351 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3352 jj = 0; 3353 for (i=0; i<m; i++) { 3354 for (j=0; j<ourlens[i]; j++) { 3355 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3356 jj++; 3357 } 3358 } 3359 3360 for (i=0; i<m; i++) { 3361 ourlens[i] -= offlens[i]; 3362 } 3363 if (!sizesset) { 3364 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3365 } 3366 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3367 3368 for (i=0; i<m; i++) { 3369 ourlens[i] += offlens[i]; 3370 } 3371 3372 if (!rank) { 3373 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3374 3375 /* read in my part of the matrix numerical values */ 3376 nz = procsnz[0]; 3377 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3378 3379 /* insert into matrix */ 3380 jj = rstart; 3381 smycols = mycols; 3382 svals = vals; 3383 for (i=0; i<m; i++) { 3384 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3385 smycols += ourlens[i]; 3386 svals += ourlens[i]; 3387 jj++; 3388 } 3389 3390 /* read in other processors and ship out */ 3391 for (i=1; i<size; i++) { 3392 nz = procsnz[i]; 3393 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3394 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3395 } 3396 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3397 } else { 3398 /* receive numeric values */ 3399 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3400 3401 /* receive message of values*/ 3402 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3403 3404 /* insert into matrix */ 3405 jj = rstart; 3406 smycols = mycols; 3407 svals = vals; 3408 for (i=0; i<m; i++) { 3409 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3410 smycols += ourlens[i]; 3411 svals += ourlens[i]; 3412 jj++; 3413 } 3414 } 3415 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3416 ierr = PetscFree(vals);CHKERRQ(ierr); 3417 ierr = PetscFree(mycols);CHKERRQ(ierr); 3418 ierr = PetscFree(rowners);CHKERRQ(ierr); 3419 3420 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3421 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } 3424 3425 #undef __FUNCT__ 3426 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3427 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3428 { 3429 PetscErrorCode ierr; 3430 IS iscol_local; 3431 PetscInt csize; 3432 3433 PetscFunctionBegin; 3434 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3435 if (call == MAT_REUSE_MATRIX) { 3436 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3437 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3438 } else { 3439 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3440 } 3441 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3442 if (call == MAT_INITIAL_MATRIX) { 3443 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3444 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3445 } 3446 PetscFunctionReturn(0); 3447 } 3448 3449 #undef __FUNCT__ 3450 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3451 /* 3452 Not great since it makes two copies of the submatrix, first an SeqAIJ 3453 in local and then by concatenating the local matrices the end result. 3454 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3455 3456 Note: This requires a sequential iscol with all indices. 3457 */ 3458 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3459 { 3460 PetscErrorCode ierr; 3461 PetscMPIInt rank,size; 3462 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3463 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3464 Mat *local,M,Mreuse; 3465 MatScalar *vwork,*aa; 3466 MPI_Comm comm = ((PetscObject)mat)->comm; 3467 Mat_SeqAIJ *aij; 3468 3469 3470 PetscFunctionBegin; 3471 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3472 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3473 3474 if (call == MAT_REUSE_MATRIX) { 3475 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3476 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3477 local = &Mreuse; 3478 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3479 } else { 3480 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3481 Mreuse = *local; 3482 ierr = PetscFree(local);CHKERRQ(ierr); 3483 } 3484 3485 /* 3486 m - number of local rows 3487 n - number of columns (same on all processors) 3488 rstart - first row in new global matrix generated 3489 */ 3490 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3491 if (call == MAT_INITIAL_MATRIX) { 3492 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3493 ii = aij->i; 3494 jj = aij->j; 3495 3496 /* 3497 Determine the number of non-zeros in the diagonal and off-diagonal 3498 portions of the matrix in order to do correct preallocation 3499 */ 3500 3501 /* first get start and end of "diagonal" columns */ 3502 if (csize == PETSC_DECIDE) { 3503 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3504 if (mglobal == n) { /* square matrix */ 3505 nlocal = m; 3506 } else { 3507 nlocal = n/size + ((n % size) > rank); 3508 } 3509 } else { 3510 nlocal = csize; 3511 } 3512 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3513 rstart = rend - nlocal; 3514 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3515 3516 /* next, compute all the lengths */ 3517 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3518 olens = dlens + m; 3519 for (i=0; i<m; i++) { 3520 jend = ii[i+1] - ii[i]; 3521 olen = 0; 3522 dlen = 0; 3523 for (j=0; j<jend; j++) { 3524 if (*jj < rstart || *jj >= rend) olen++; 3525 else dlen++; 3526 jj++; 3527 } 3528 olens[i] = olen; 3529 dlens[i] = dlen; 3530 } 3531 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3532 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3533 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3534 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3535 ierr = PetscFree(dlens);CHKERRQ(ierr); 3536 } else { 3537 PetscInt ml,nl; 3538 3539 M = *newmat; 3540 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3541 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3542 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3543 /* 3544 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3545 rather than the slower MatSetValues(). 3546 */ 3547 M->was_assembled = PETSC_TRUE; 3548 M->assembled = PETSC_FALSE; 3549 } 3550 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3551 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3552 ii = aij->i; 3553 jj = aij->j; 3554 aa = aij->a; 3555 for (i=0; i<m; i++) { 3556 row = rstart + i; 3557 nz = ii[i+1] - ii[i]; 3558 cwork = jj; jj += nz; 3559 vwork = aa; aa += nz; 3560 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3561 } 3562 3563 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3564 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3565 *newmat = M; 3566 3567 /* save submatrix used in processor for next request */ 3568 if (call == MAT_INITIAL_MATRIX) { 3569 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3570 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3571 } 3572 3573 PetscFunctionReturn(0); 3574 } 3575 3576 EXTERN_C_BEGIN 3577 #undef __FUNCT__ 3578 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3579 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3580 { 3581 PetscInt m,cstart, cend,j,nnz,i,d; 3582 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3583 const PetscInt *JJ; 3584 PetscScalar *values; 3585 PetscErrorCode ierr; 3586 3587 PetscFunctionBegin; 3588 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3589 3590 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3591 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3592 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3593 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3594 m = B->rmap->n; 3595 cstart = B->cmap->rstart; 3596 cend = B->cmap->rend; 3597 rstart = B->rmap->rstart; 3598 3599 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3600 3601 #if defined(PETSC_USE_DEBUGGING) 3602 for (i=0; i<m; i++) { 3603 nnz = Ii[i+1]- Ii[i]; 3604 JJ = J + Ii[i]; 3605 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3606 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3607 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3608 } 3609 #endif 3610 3611 for (i=0; i<m; i++) { 3612 nnz = Ii[i+1]- Ii[i]; 3613 JJ = J + Ii[i]; 3614 nnz_max = PetscMax(nnz_max,nnz); 3615 d = 0; 3616 for (j=0; j<nnz; j++) { 3617 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3618 } 3619 d_nnz[i] = d; 3620 o_nnz[i] = nnz - d; 3621 } 3622 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3623 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3624 3625 if (v) values = (PetscScalar*)v; 3626 else { 3627 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3628 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3629 } 3630 3631 for (i=0; i<m; i++) { 3632 ii = i + rstart; 3633 nnz = Ii[i+1]- Ii[i]; 3634 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3635 } 3636 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3637 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3638 3639 if (!v) { 3640 ierr = PetscFree(values);CHKERRQ(ierr); 3641 } 3642 PetscFunctionReturn(0); 3643 } 3644 EXTERN_C_END 3645 3646 #undef __FUNCT__ 3647 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3648 /*@ 3649 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3650 (the default parallel PETSc format). 3651 3652 Collective on MPI_Comm 3653 3654 Input Parameters: 3655 + B - the matrix 3656 . i - the indices into j for the start of each local row (starts with zero) 3657 . j - the column indices for each local row (starts with zero) 3658 - v - optional values in the matrix 3659 3660 Level: developer 3661 3662 Notes: 3663 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3664 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3665 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3666 3667 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3668 3669 The format which is used for the sparse matrix input, is equivalent to a 3670 row-major ordering.. i.e for the following matrix, the input data expected is 3671 as shown: 3672 3673 1 0 0 3674 2 0 3 P0 3675 ------- 3676 4 5 6 P1 3677 3678 Process0 [P0]: rows_owned=[0,1] 3679 i = {0,1,3} [size = nrow+1 = 2+1] 3680 j = {0,0,2} [size = nz = 6] 3681 v = {1,2,3} [size = nz = 6] 3682 3683 Process1 [P1]: rows_owned=[2] 3684 i = {0,3} [size = nrow+1 = 1+1] 3685 j = {0,1,2} [size = nz = 6] 3686 v = {4,5,6} [size = nz = 6] 3687 3688 .keywords: matrix, aij, compressed row, sparse, parallel 3689 3690 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3691 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3692 @*/ 3693 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3694 { 3695 PetscErrorCode ierr; 3696 3697 PetscFunctionBegin; 3698 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3699 PetscFunctionReturn(0); 3700 } 3701 3702 #undef __FUNCT__ 3703 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3704 /*@C 3705 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3706 (the default parallel PETSc format). For good matrix assembly performance 3707 the user should preallocate the matrix storage by setting the parameters 3708 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3709 performance can be increased by more than a factor of 50. 3710 3711 Collective on MPI_Comm 3712 3713 Input Parameters: 3714 + A - the matrix 3715 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3716 (same value is used for all local rows) 3717 . d_nnz - array containing the number of nonzeros in the various rows of the 3718 DIAGONAL portion of the local submatrix (possibly different for each row) 3719 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3720 The size of this array is equal to the number of local rows, i.e 'm'. 3721 You must leave room for the diagonal entry even if it is zero. 3722 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3723 submatrix (same value is used for all local rows). 3724 - o_nnz - array containing the number of nonzeros in the various rows of the 3725 OFF-DIAGONAL portion of the local submatrix (possibly different for 3726 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3727 structure. The size of this array is equal to the number 3728 of local rows, i.e 'm'. 3729 3730 If the *_nnz parameter is given then the *_nz parameter is ignored 3731 3732 The AIJ format (also called the Yale sparse matrix format or 3733 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3734 storage. The stored row and column indices begin with zero. 3735 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3736 3737 The parallel matrix is partitioned such that the first m0 rows belong to 3738 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3739 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3740 3741 The DIAGONAL portion of the local submatrix of a processor can be defined 3742 as the submatrix which is obtained by extraction the part corresponding to 3743 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3744 first row that belongs to the processor, r2 is the last row belonging to 3745 the this processor, and c1-c2 is range of indices of the local part of a 3746 vector suitable for applying the matrix to. This is an mxn matrix. In the 3747 common case of a square matrix, the row and column ranges are the same and 3748 the DIAGONAL part is also square. The remaining portion of the local 3749 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3750 3751 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3752 3753 You can call MatGetInfo() to get information on how effective the preallocation was; 3754 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3755 You can also run with the option -info and look for messages with the string 3756 malloc in them to see if additional memory allocation was needed. 3757 3758 Example usage: 3759 3760 Consider the following 8x8 matrix with 34 non-zero values, that is 3761 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3762 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3763 as follows: 3764 3765 .vb 3766 1 2 0 | 0 3 0 | 0 4 3767 Proc0 0 5 6 | 7 0 0 | 8 0 3768 9 0 10 | 11 0 0 | 12 0 3769 ------------------------------------- 3770 13 0 14 | 15 16 17 | 0 0 3771 Proc1 0 18 0 | 19 20 21 | 0 0 3772 0 0 0 | 22 23 0 | 24 0 3773 ------------------------------------- 3774 Proc2 25 26 27 | 0 0 28 | 29 0 3775 30 0 0 | 31 32 33 | 0 34 3776 .ve 3777 3778 This can be represented as a collection of submatrices as: 3779 3780 .vb 3781 A B C 3782 D E F 3783 G H I 3784 .ve 3785 3786 Where the submatrices A,B,C are owned by proc0, D,E,F are 3787 owned by proc1, G,H,I are owned by proc2. 3788 3789 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3790 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3791 The 'M','N' parameters are 8,8, and have the same values on all procs. 3792 3793 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3794 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3795 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3796 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3797 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3798 matrix, ans [DF] as another SeqAIJ matrix. 3799 3800 When d_nz, o_nz parameters are specified, d_nz storage elements are 3801 allocated for every row of the local diagonal submatrix, and o_nz 3802 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3803 One way to choose d_nz and o_nz is to use the max nonzerors per local 3804 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3805 In this case, the values of d_nz,o_nz are: 3806 .vb 3807 proc0 : dnz = 2, o_nz = 2 3808 proc1 : dnz = 3, o_nz = 2 3809 proc2 : dnz = 1, o_nz = 4 3810 .ve 3811 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3812 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3813 for proc3. i.e we are using 12+15+10=37 storage locations to store 3814 34 values. 3815 3816 When d_nnz, o_nnz parameters are specified, the storage is specified 3817 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3818 In the above case the values for d_nnz,o_nnz are: 3819 .vb 3820 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3821 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3822 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3823 .ve 3824 Here the space allocated is sum of all the above values i.e 34, and 3825 hence pre-allocation is perfect. 3826 3827 Level: intermediate 3828 3829 .keywords: matrix, aij, compressed row, sparse, parallel 3830 3831 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3832 MPIAIJ, MatGetInfo() 3833 @*/ 3834 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3835 { 3836 PetscErrorCode ierr; 3837 3838 PetscFunctionBegin; 3839 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3840 PetscFunctionReturn(0); 3841 } 3842 3843 #undef __FUNCT__ 3844 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3845 /*@ 3846 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3847 CSR format the local rows. 3848 3849 Collective on MPI_Comm 3850 3851 Input Parameters: 3852 + comm - MPI communicator 3853 . m - number of local rows (Cannot be PETSC_DECIDE) 3854 . n - This value should be the same as the local size used in creating the 3855 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3856 calculated if N is given) For square matrices n is almost always m. 3857 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3858 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3859 . i - row indices 3860 . j - column indices 3861 - a - matrix values 3862 3863 Output Parameter: 3864 . mat - the matrix 3865 3866 Level: intermediate 3867 3868 Notes: 3869 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3870 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3871 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3872 3873 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3874 3875 The format which is used for the sparse matrix input, is equivalent to a 3876 row-major ordering.. i.e for the following matrix, the input data expected is 3877 as shown: 3878 3879 1 0 0 3880 2 0 3 P0 3881 ------- 3882 4 5 6 P1 3883 3884 Process0 [P0]: rows_owned=[0,1] 3885 i = {0,1,3} [size = nrow+1 = 2+1] 3886 j = {0,0,2} [size = nz = 6] 3887 v = {1,2,3} [size = nz = 6] 3888 3889 Process1 [P1]: rows_owned=[2] 3890 i = {0,3} [size = nrow+1 = 1+1] 3891 j = {0,1,2} [size = nz = 6] 3892 v = {4,5,6} [size = nz = 6] 3893 3894 .keywords: matrix, aij, compressed row, sparse, parallel 3895 3896 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3897 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3898 @*/ 3899 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3900 { 3901 PetscErrorCode ierr; 3902 3903 PetscFunctionBegin; 3904 if (i[0]) { 3905 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3906 } 3907 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3908 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3909 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3910 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3911 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3912 PetscFunctionReturn(0); 3913 } 3914 3915 #undef __FUNCT__ 3916 #define __FUNCT__ "MatCreateMPIAIJ" 3917 /*@C 3918 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3919 (the default parallel PETSc format). For good matrix assembly performance 3920 the user should preallocate the matrix storage by setting the parameters 3921 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3922 performance can be increased by more than a factor of 50. 3923 3924 Collective on MPI_Comm 3925 3926 Input Parameters: 3927 + comm - MPI communicator 3928 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3929 This value should be the same as the local size used in creating the 3930 y vector for the matrix-vector product y = Ax. 3931 . n - This value should be the same as the local size used in creating the 3932 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3933 calculated if N is given) For square matrices n is almost always m. 3934 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3935 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3936 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3937 (same value is used for all local rows) 3938 . d_nnz - array containing the number of nonzeros in the various rows of the 3939 DIAGONAL portion of the local submatrix (possibly different for each row) 3940 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3941 The size of this array is equal to the number of local rows, i.e 'm'. 3942 You must leave room for the diagonal entry even if it is zero. 3943 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3944 submatrix (same value is used for all local rows). 3945 - o_nnz - array containing the number of nonzeros in the various rows of the 3946 OFF-DIAGONAL portion of the local submatrix (possibly different for 3947 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3948 structure. The size of this array is equal to the number 3949 of local rows, i.e 'm'. 3950 3951 Output Parameter: 3952 . A - the matrix 3953 3954 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3955 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3956 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3957 3958 Notes: 3959 If the *_nnz parameter is given then the *_nz parameter is ignored 3960 3961 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3962 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3963 storage requirements for this matrix. 3964 3965 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3966 processor than it must be used on all processors that share the object for 3967 that argument. 3968 3969 The user MUST specify either the local or global matrix dimensions 3970 (possibly both). 3971 3972 The parallel matrix is partitioned across processors such that the 3973 first m0 rows belong to process 0, the next m1 rows belong to 3974 process 1, the next m2 rows belong to process 2 etc.. where 3975 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3976 values corresponding to [m x N] submatrix. 3977 3978 The columns are logically partitioned with the n0 columns belonging 3979 to 0th partition, the next n1 columns belonging to the next 3980 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3981 3982 The DIAGONAL portion of the local submatrix on any given processor 3983 is the submatrix corresponding to the rows and columns m,n 3984 corresponding to the given processor. i.e diagonal matrix on 3985 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3986 etc. The remaining portion of the local submatrix [m x (N-n)] 3987 constitute the OFF-DIAGONAL portion. The example below better 3988 illustrates this concept. 3989 3990 For a square global matrix we define each processor's diagonal portion 3991 to be its local rows and the corresponding columns (a square submatrix); 3992 each processor's off-diagonal portion encompasses the remainder of the 3993 local matrix (a rectangular submatrix). 3994 3995 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3996 3997 When calling this routine with a single process communicator, a matrix of 3998 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3999 type of communicator, use the construction mechanism: 4000 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4001 4002 By default, this format uses inodes (identical nodes) when possible. 4003 We search for consecutive rows with the same nonzero structure, thereby 4004 reusing matrix information to achieve increased efficiency. 4005 4006 Options Database Keys: 4007 + -mat_no_inode - Do not use inodes 4008 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4009 - -mat_aij_oneindex - Internally use indexing starting at 1 4010 rather than 0. Note that when calling MatSetValues(), 4011 the user still MUST index entries starting at 0! 4012 4013 4014 Example usage: 4015 4016 Consider the following 8x8 matrix with 34 non-zero values, that is 4017 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4018 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4019 as follows: 4020 4021 .vb 4022 1 2 0 | 0 3 0 | 0 4 4023 Proc0 0 5 6 | 7 0 0 | 8 0 4024 9 0 10 | 11 0 0 | 12 0 4025 ------------------------------------- 4026 13 0 14 | 15 16 17 | 0 0 4027 Proc1 0 18 0 | 19 20 21 | 0 0 4028 0 0 0 | 22 23 0 | 24 0 4029 ------------------------------------- 4030 Proc2 25 26 27 | 0 0 28 | 29 0 4031 30 0 0 | 31 32 33 | 0 34 4032 .ve 4033 4034 This can be represented as a collection of submatrices as: 4035 4036 .vb 4037 A B C 4038 D E F 4039 G H I 4040 .ve 4041 4042 Where the submatrices A,B,C are owned by proc0, D,E,F are 4043 owned by proc1, G,H,I are owned by proc2. 4044 4045 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4046 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4047 The 'M','N' parameters are 8,8, and have the same values on all procs. 4048 4049 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4050 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4051 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4052 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4053 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4054 matrix, ans [DF] as another SeqAIJ matrix. 4055 4056 When d_nz, o_nz parameters are specified, d_nz storage elements are 4057 allocated for every row of the local diagonal submatrix, and o_nz 4058 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4059 One way to choose d_nz and o_nz is to use the max nonzerors per local 4060 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4061 In this case, the values of d_nz,o_nz are: 4062 .vb 4063 proc0 : dnz = 2, o_nz = 2 4064 proc1 : dnz = 3, o_nz = 2 4065 proc2 : dnz = 1, o_nz = 4 4066 .ve 4067 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4068 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4069 for proc3. i.e we are using 12+15+10=37 storage locations to store 4070 34 values. 4071 4072 When d_nnz, o_nnz parameters are specified, the storage is specified 4073 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4074 In the above case the values for d_nnz,o_nnz are: 4075 .vb 4076 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4077 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4078 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4079 .ve 4080 Here the space allocated is sum of all the above values i.e 34, and 4081 hence pre-allocation is perfect. 4082 4083 Level: intermediate 4084 4085 .keywords: matrix, aij, compressed row, sparse, parallel 4086 4087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4088 MPIAIJ, MatCreateMPIAIJWithArrays() 4089 @*/ 4090 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4091 { 4092 PetscErrorCode ierr; 4093 PetscMPIInt size; 4094 4095 PetscFunctionBegin; 4096 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4097 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4098 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4099 if (size > 1) { 4100 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4101 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4102 } else { 4103 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4104 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4105 } 4106 PetscFunctionReturn(0); 4107 } 4108 4109 #undef __FUNCT__ 4110 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4111 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4112 { 4113 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4114 4115 PetscFunctionBegin; 4116 *Ad = a->A; 4117 *Ao = a->B; 4118 *colmap = a->garray; 4119 PetscFunctionReturn(0); 4120 } 4121 4122 #undef __FUNCT__ 4123 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4124 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4125 { 4126 PetscErrorCode ierr; 4127 PetscInt i; 4128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4129 4130 PetscFunctionBegin; 4131 if (coloring->ctype == IS_COLORING_GLOBAL) { 4132 ISColoringValue *allcolors,*colors; 4133 ISColoring ocoloring; 4134 4135 /* set coloring for diagonal portion */ 4136 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4137 4138 /* set coloring for off-diagonal portion */ 4139 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4140 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4141 for (i=0; i<a->B->cmap->n; i++) { 4142 colors[i] = allcolors[a->garray[i]]; 4143 } 4144 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4145 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4146 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4147 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4148 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4149 ISColoringValue *colors; 4150 PetscInt *larray; 4151 ISColoring ocoloring; 4152 4153 /* set coloring for diagonal portion */ 4154 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4155 for (i=0; i<a->A->cmap->n; i++) { 4156 larray[i] = i + A->cmap->rstart; 4157 } 4158 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4159 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4160 for (i=0; i<a->A->cmap->n; i++) { 4161 colors[i] = coloring->colors[larray[i]]; 4162 } 4163 ierr = PetscFree(larray);CHKERRQ(ierr); 4164 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4165 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4166 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4167 4168 /* set coloring for off-diagonal portion */ 4169 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4170 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4171 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4172 for (i=0; i<a->B->cmap->n; i++) { 4173 colors[i] = coloring->colors[larray[i]]; 4174 } 4175 ierr = PetscFree(larray);CHKERRQ(ierr); 4176 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4177 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4178 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4179 } else { 4180 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4181 } 4182 4183 PetscFunctionReturn(0); 4184 } 4185 4186 #if defined(PETSC_HAVE_ADIC) 4187 #undef __FUNCT__ 4188 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4189 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4190 { 4191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4192 PetscErrorCode ierr; 4193 4194 PetscFunctionBegin; 4195 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4196 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4197 PetscFunctionReturn(0); 4198 } 4199 #endif 4200 4201 #undef __FUNCT__ 4202 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4203 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4204 { 4205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4206 PetscErrorCode ierr; 4207 4208 PetscFunctionBegin; 4209 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4210 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4211 PetscFunctionReturn(0); 4212 } 4213 4214 #undef __FUNCT__ 4215 #define __FUNCT__ "MatMerge" 4216 /*@ 4217 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4218 matrices from each processor 4219 4220 Collective on MPI_Comm 4221 4222 Input Parameters: 4223 + comm - the communicators the parallel matrix will live on 4224 . inmat - the input sequential matrices 4225 . n - number of local columns (or PETSC_DECIDE) 4226 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4227 4228 Output Parameter: 4229 . outmat - the parallel matrix generated 4230 4231 Level: advanced 4232 4233 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4234 4235 @*/ 4236 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4237 { 4238 PetscErrorCode ierr; 4239 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4240 PetscInt *indx; 4241 PetscScalar *values; 4242 4243 PetscFunctionBegin; 4244 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4245 if (scall == MAT_INITIAL_MATRIX){ 4246 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4247 if (n == PETSC_DECIDE){ 4248 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4249 } 4250 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4251 rstart -= m; 4252 4253 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4254 for (i=0;i<m;i++) { 4255 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4256 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4257 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4258 } 4259 /* This routine will ONLY return MPIAIJ type matrix */ 4260 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4261 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4262 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4263 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4264 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4265 4266 } else if (scall == MAT_REUSE_MATRIX){ 4267 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4268 } else { 4269 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4270 } 4271 4272 for (i=0;i<m;i++) { 4273 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4274 Ii = i + rstart; 4275 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4276 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4277 } 4278 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4279 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4280 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4281 4282 PetscFunctionReturn(0); 4283 } 4284 4285 #undef __FUNCT__ 4286 #define __FUNCT__ "MatFileSplit" 4287 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4288 { 4289 PetscErrorCode ierr; 4290 PetscMPIInt rank; 4291 PetscInt m,N,i,rstart,nnz; 4292 size_t len; 4293 const PetscInt *indx; 4294 PetscViewer out; 4295 char *name; 4296 Mat B; 4297 const PetscScalar *values; 4298 4299 PetscFunctionBegin; 4300 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4301 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4302 /* Should this be the type of the diagonal block of A? */ 4303 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4304 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4305 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4306 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4307 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4308 for (i=0;i<m;i++) { 4309 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4310 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4311 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4312 } 4313 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4314 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4315 4316 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4317 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4318 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4319 sprintf(name,"%s.%d",outfile,rank); 4320 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4321 ierr = PetscFree(name); 4322 ierr = MatView(B,out);CHKERRQ(ierr); 4323 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4324 ierr = MatDestroy(B);CHKERRQ(ierr); 4325 PetscFunctionReturn(0); 4326 } 4327 4328 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4331 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4332 { 4333 PetscErrorCode ierr; 4334 Mat_Merge_SeqsToMPI *merge; 4335 PetscContainer container; 4336 4337 PetscFunctionBegin; 4338 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4339 if (container) { 4340 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4341 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4342 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4343 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4344 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4345 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4346 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4347 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4348 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4349 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4350 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4351 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4352 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4353 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4354 4355 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4356 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4357 } 4358 ierr = PetscFree(merge);CHKERRQ(ierr); 4359 4360 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #include "../src/mat/utils/freespace.h" 4365 #include "petscbt.h" 4366 4367 #undef __FUNCT__ 4368 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4369 /*@C 4370 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4371 matrices from each processor 4372 4373 Collective on MPI_Comm 4374 4375 Input Parameters: 4376 + comm - the communicators the parallel matrix will live on 4377 . seqmat - the input sequential matrices 4378 . m - number of local rows (or PETSC_DECIDE) 4379 . n - number of local columns (or PETSC_DECIDE) 4380 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4381 4382 Output Parameter: 4383 . mpimat - the parallel matrix generated 4384 4385 Level: advanced 4386 4387 Notes: 4388 The dimensions of the sequential matrix in each processor MUST be the same. 4389 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4390 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4391 @*/ 4392 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4393 { 4394 PetscErrorCode ierr; 4395 MPI_Comm comm=((PetscObject)mpimat)->comm; 4396 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4397 PetscMPIInt size,rank,taga,*len_s; 4398 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4399 PetscInt proc,m; 4400 PetscInt **buf_ri,**buf_rj; 4401 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4402 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4403 MPI_Request *s_waits,*r_waits; 4404 MPI_Status *status; 4405 MatScalar *aa=a->a; 4406 MatScalar **abuf_r,*ba_i; 4407 Mat_Merge_SeqsToMPI *merge; 4408 PetscContainer container; 4409 4410 PetscFunctionBegin; 4411 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4412 4413 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4414 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4415 4416 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4417 if (container) { 4418 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4419 } 4420 bi = merge->bi; 4421 bj = merge->bj; 4422 buf_ri = merge->buf_ri; 4423 buf_rj = merge->buf_rj; 4424 4425 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4426 owners = merge->rowmap->range; 4427 len_s = merge->len_s; 4428 4429 /* send and recv matrix values */ 4430 /*-----------------------------*/ 4431 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4432 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4433 4434 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4435 for (proc=0,k=0; proc<size; proc++){ 4436 if (!len_s[proc]) continue; 4437 i = owners[proc]; 4438 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4439 k++; 4440 } 4441 4442 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4443 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4444 ierr = PetscFree(status);CHKERRQ(ierr); 4445 4446 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4447 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4448 4449 /* insert mat values of mpimat */ 4450 /*----------------------------*/ 4451 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4452 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4453 4454 for (k=0; k<merge->nrecv; k++){ 4455 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4456 nrows = *(buf_ri_k[k]); 4457 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4458 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4459 } 4460 4461 /* set values of ba */ 4462 m = merge->rowmap->n; 4463 for (i=0; i<m; i++) { 4464 arow = owners[rank] + i; 4465 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4466 bnzi = bi[i+1] - bi[i]; 4467 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4468 4469 /* add local non-zero vals of this proc's seqmat into ba */ 4470 anzi = ai[arow+1] - ai[arow]; 4471 aj = a->j + ai[arow]; 4472 aa = a->a + ai[arow]; 4473 nextaj = 0; 4474 for (j=0; nextaj<anzi; j++){ 4475 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4476 ba_i[j] += aa[nextaj++]; 4477 } 4478 } 4479 4480 /* add received vals into ba */ 4481 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4482 /* i-th row */ 4483 if (i == *nextrow[k]) { 4484 anzi = *(nextai[k]+1) - *nextai[k]; 4485 aj = buf_rj[k] + *(nextai[k]); 4486 aa = abuf_r[k] + *(nextai[k]); 4487 nextaj = 0; 4488 for (j=0; nextaj<anzi; j++){ 4489 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4490 ba_i[j] += aa[nextaj++]; 4491 } 4492 } 4493 nextrow[k]++; nextai[k]++; 4494 } 4495 } 4496 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4497 } 4498 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4499 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4500 4501 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4502 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4503 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4504 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4505 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4506 PetscFunctionReturn(0); 4507 } 4508 4509 #undef __FUNCT__ 4510 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4511 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4512 { 4513 PetscErrorCode ierr; 4514 Mat B_mpi; 4515 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4516 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4517 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4518 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4519 PetscInt len,proc,*dnz,*onz; 4520 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4521 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4522 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4523 MPI_Status *status; 4524 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4525 PetscBT lnkbt; 4526 Mat_Merge_SeqsToMPI *merge; 4527 PetscContainer container; 4528 4529 PetscFunctionBegin; 4530 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4531 4532 /* make sure it is a PETSc comm */ 4533 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4535 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4536 4537 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4538 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4539 4540 /* determine row ownership */ 4541 /*---------------------------------------------------------*/ 4542 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4543 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4544 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4545 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4546 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4547 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4548 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4549 4550 m = merge->rowmap->n; 4551 M = merge->rowmap->N; 4552 owners = merge->rowmap->range; 4553 4554 /* determine the number of messages to send, their lengths */ 4555 /*---------------------------------------------------------*/ 4556 len_s = merge->len_s; 4557 4558 len = 0; /* length of buf_si[] */ 4559 merge->nsend = 0; 4560 for (proc=0; proc<size; proc++){ 4561 len_si[proc] = 0; 4562 if (proc == rank){ 4563 len_s[proc] = 0; 4564 } else { 4565 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4566 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4567 } 4568 if (len_s[proc]) { 4569 merge->nsend++; 4570 nrows = 0; 4571 for (i=owners[proc]; i<owners[proc+1]; i++){ 4572 if (ai[i+1] > ai[i]) nrows++; 4573 } 4574 len_si[proc] = 2*(nrows+1); 4575 len += len_si[proc]; 4576 } 4577 } 4578 4579 /* determine the number and length of messages to receive for ij-structure */ 4580 /*-------------------------------------------------------------------------*/ 4581 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4582 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4583 4584 /* post the Irecv of j-structure */ 4585 /*-------------------------------*/ 4586 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4587 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4588 4589 /* post the Isend of j-structure */ 4590 /*--------------------------------*/ 4591 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4592 4593 for (proc=0, k=0; proc<size; proc++){ 4594 if (!len_s[proc]) continue; 4595 i = owners[proc]; 4596 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4597 k++; 4598 } 4599 4600 /* receives and sends of j-structure are complete */ 4601 /*------------------------------------------------*/ 4602 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4603 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4604 4605 /* send and recv i-structure */ 4606 /*---------------------------*/ 4607 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4608 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4609 4610 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4611 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4612 for (proc=0,k=0; proc<size; proc++){ 4613 if (!len_s[proc]) continue; 4614 /* form outgoing message for i-structure: 4615 buf_si[0]: nrows to be sent 4616 [1:nrows]: row index (global) 4617 [nrows+1:2*nrows+1]: i-structure index 4618 */ 4619 /*-------------------------------------------*/ 4620 nrows = len_si[proc]/2 - 1; 4621 buf_si_i = buf_si + nrows+1; 4622 buf_si[0] = nrows; 4623 buf_si_i[0] = 0; 4624 nrows = 0; 4625 for (i=owners[proc]; i<owners[proc+1]; i++){ 4626 anzi = ai[i+1] - ai[i]; 4627 if (anzi) { 4628 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4629 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4630 nrows++; 4631 } 4632 } 4633 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4634 k++; 4635 buf_si += len_si[proc]; 4636 } 4637 4638 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4639 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4640 4641 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4642 for (i=0; i<merge->nrecv; i++){ 4643 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4644 } 4645 4646 ierr = PetscFree(len_si);CHKERRQ(ierr); 4647 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4648 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4649 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4650 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4651 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4652 ierr = PetscFree(status);CHKERRQ(ierr); 4653 4654 /* compute a local seq matrix in each processor */ 4655 /*----------------------------------------------*/ 4656 /* allocate bi array and free space for accumulating nonzero column info */ 4657 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4658 bi[0] = 0; 4659 4660 /* create and initialize a linked list */ 4661 nlnk = N+1; 4662 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4663 4664 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4665 len = 0; 4666 len = ai[owners[rank+1]] - ai[owners[rank]]; 4667 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4668 current_space = free_space; 4669 4670 /* determine symbolic info for each local row */ 4671 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4672 4673 for (k=0; k<merge->nrecv; k++){ 4674 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4675 nrows = *buf_ri_k[k]; 4676 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4677 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4678 } 4679 4680 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4681 len = 0; 4682 for (i=0;i<m;i++) { 4683 bnzi = 0; 4684 /* add local non-zero cols of this proc's seqmat into lnk */ 4685 arow = owners[rank] + i; 4686 anzi = ai[arow+1] - ai[arow]; 4687 aj = a->j + ai[arow]; 4688 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4689 bnzi += nlnk; 4690 /* add received col data into lnk */ 4691 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4692 if (i == *nextrow[k]) { /* i-th row */ 4693 anzi = *(nextai[k]+1) - *nextai[k]; 4694 aj = buf_rj[k] + *nextai[k]; 4695 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4696 bnzi += nlnk; 4697 nextrow[k]++; nextai[k]++; 4698 } 4699 } 4700 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4701 4702 /* if free space is not available, make more free space */ 4703 if (current_space->local_remaining<bnzi) { 4704 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4705 nspacedouble++; 4706 } 4707 /* copy data into free space, then initialize lnk */ 4708 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4709 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4710 4711 current_space->array += bnzi; 4712 current_space->local_used += bnzi; 4713 current_space->local_remaining -= bnzi; 4714 4715 bi[i+1] = bi[i] + bnzi; 4716 } 4717 4718 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4719 4720 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4721 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4722 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4723 4724 /* create symbolic parallel matrix B_mpi */ 4725 /*---------------------------------------*/ 4726 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4727 if (n==PETSC_DECIDE) { 4728 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4729 } else { 4730 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4731 } 4732 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4733 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4734 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4735 4736 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4737 B_mpi->assembled = PETSC_FALSE; 4738 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4739 merge->bi = bi; 4740 merge->bj = bj; 4741 merge->buf_ri = buf_ri; 4742 merge->buf_rj = buf_rj; 4743 merge->coi = PETSC_NULL; 4744 merge->coj = PETSC_NULL; 4745 merge->owners_co = PETSC_NULL; 4746 4747 /* attach the supporting struct to B_mpi for reuse */ 4748 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4749 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4750 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4751 *mpimat = B_mpi; 4752 4753 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4754 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4755 PetscFunctionReturn(0); 4756 } 4757 4758 #undef __FUNCT__ 4759 #define __FUNCT__ "MatMerge_SeqsToMPI" 4760 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4761 { 4762 PetscErrorCode ierr; 4763 4764 PetscFunctionBegin; 4765 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4766 if (scall == MAT_INITIAL_MATRIX){ 4767 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4768 } 4769 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4770 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4771 PetscFunctionReturn(0); 4772 } 4773 4774 #undef __FUNCT__ 4775 #define __FUNCT__ "MatGetLocalMat" 4776 /*@ 4777 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4778 4779 Not Collective 4780 4781 Input Parameters: 4782 + A - the matrix 4783 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4784 4785 Output Parameter: 4786 . A_loc - the local sequential matrix generated 4787 4788 Level: developer 4789 4790 @*/ 4791 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4792 { 4793 PetscErrorCode ierr; 4794 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4795 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4796 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4797 MatScalar *aa=a->a,*ba=b->a,*cam; 4798 PetscScalar *ca; 4799 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4800 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4801 4802 PetscFunctionBegin; 4803 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4804 if (scall == MAT_INITIAL_MATRIX){ 4805 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4806 ci[0] = 0; 4807 for (i=0; i<am; i++){ 4808 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4809 } 4810 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4811 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4812 k = 0; 4813 for (i=0; i<am; i++) { 4814 ncols_o = bi[i+1] - bi[i]; 4815 ncols_d = ai[i+1] - ai[i]; 4816 /* off-diagonal portion of A */ 4817 for (jo=0; jo<ncols_o; jo++) { 4818 col = cmap[*bj]; 4819 if (col >= cstart) break; 4820 cj[k] = col; bj++; 4821 ca[k++] = *ba++; 4822 } 4823 /* diagonal portion of A */ 4824 for (j=0; j<ncols_d; j++) { 4825 cj[k] = cstart + *aj++; 4826 ca[k++] = *aa++; 4827 } 4828 /* off-diagonal portion of A */ 4829 for (j=jo; j<ncols_o; j++) { 4830 cj[k] = cmap[*bj++]; 4831 ca[k++] = *ba++; 4832 } 4833 } 4834 /* put together the new matrix */ 4835 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4836 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4837 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4838 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4839 mat->free_a = PETSC_TRUE; 4840 mat->free_ij = PETSC_TRUE; 4841 mat->nonew = 0; 4842 } else if (scall == MAT_REUSE_MATRIX){ 4843 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4844 ci = mat->i; cj = mat->j; cam = mat->a; 4845 for (i=0; i<am; i++) { 4846 /* off-diagonal portion of A */ 4847 ncols_o = bi[i+1] - bi[i]; 4848 for (jo=0; jo<ncols_o; jo++) { 4849 col = cmap[*bj]; 4850 if (col >= cstart) break; 4851 *cam++ = *ba++; bj++; 4852 } 4853 /* diagonal portion of A */ 4854 ncols_d = ai[i+1] - ai[i]; 4855 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4856 /* off-diagonal portion of A */ 4857 for (j=jo; j<ncols_o; j++) { 4858 *cam++ = *ba++; bj++; 4859 } 4860 } 4861 } else { 4862 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4863 } 4864 4865 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4866 PetscFunctionReturn(0); 4867 } 4868 4869 #undef __FUNCT__ 4870 #define __FUNCT__ "MatGetLocalMatCondensed" 4871 /*@C 4872 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4873 4874 Not Collective 4875 4876 Input Parameters: 4877 + A - the matrix 4878 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4879 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4880 4881 Output Parameter: 4882 . A_loc - the local sequential matrix generated 4883 4884 Level: developer 4885 4886 @*/ 4887 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4888 { 4889 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4890 PetscErrorCode ierr; 4891 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4892 IS isrowa,iscola; 4893 Mat *aloc; 4894 4895 PetscFunctionBegin; 4896 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4897 if (!row){ 4898 start = A->rmap->rstart; end = A->rmap->rend; 4899 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4900 } else { 4901 isrowa = *row; 4902 } 4903 if (!col){ 4904 start = A->cmap->rstart; 4905 cmap = a->garray; 4906 nzA = a->A->cmap->n; 4907 nzB = a->B->cmap->n; 4908 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4909 ncols = 0; 4910 for (i=0; i<nzB; i++) { 4911 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4912 else break; 4913 } 4914 imark = i; 4915 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4916 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4917 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4918 } else { 4919 iscola = *col; 4920 } 4921 if (scall != MAT_INITIAL_MATRIX){ 4922 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4923 aloc[0] = *A_loc; 4924 } 4925 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4926 *A_loc = aloc[0]; 4927 ierr = PetscFree(aloc);CHKERRQ(ierr); 4928 if (!row){ 4929 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4930 } 4931 if (!col){ 4932 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4933 } 4934 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4935 PetscFunctionReturn(0); 4936 } 4937 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatGetBrowsOfAcols" 4940 /*@C 4941 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4942 4943 Collective on Mat 4944 4945 Input Parameters: 4946 + A,B - the matrices in mpiaij format 4947 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4948 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4949 4950 Output Parameter: 4951 + rowb, colb - index sets of rows and columns of B to extract 4952 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4953 - B_seq - the sequential matrix generated 4954 4955 Level: developer 4956 4957 @*/ 4958 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4959 { 4960 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4961 PetscErrorCode ierr; 4962 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4963 IS isrowb,iscolb; 4964 Mat *bseq; 4965 4966 PetscFunctionBegin; 4967 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4968 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4969 } 4970 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4971 4972 if (scall == MAT_INITIAL_MATRIX){ 4973 start = A->cmap->rstart; 4974 cmap = a->garray; 4975 nzA = a->A->cmap->n; 4976 nzB = a->B->cmap->n; 4977 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4978 ncols = 0; 4979 for (i=0; i<nzB; i++) { /* row < local row index */ 4980 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4981 else break; 4982 } 4983 imark = i; 4984 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4985 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4986 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4987 *brstart = imark; 4988 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4989 } else { 4990 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4991 isrowb = *rowb; iscolb = *colb; 4992 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4993 bseq[0] = *B_seq; 4994 } 4995 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4996 *B_seq = bseq[0]; 4997 ierr = PetscFree(bseq);CHKERRQ(ierr); 4998 if (!rowb){ 4999 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 5000 } else { 5001 *rowb = isrowb; 5002 } 5003 if (!colb){ 5004 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 5005 } else { 5006 *colb = iscolb; 5007 } 5008 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5009 PetscFunctionReturn(0); 5010 } 5011 5012 #undef __FUNCT__ 5013 #define __FUNCT__ "MatGetBrowsOfAoCols" 5014 /*@C 5015 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5016 of the OFF-DIAGONAL portion of local A 5017 5018 Collective on Mat 5019 5020 Input Parameters: 5021 + A,B - the matrices in mpiaij format 5022 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5023 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5024 . startsj_r - similar to startsj for receives 5025 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5026 5027 Output Parameter: 5028 + B_oth - the sequential matrix generated 5029 5030 Level: developer 5031 5032 @*/ 5033 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5034 { 5035 VecScatter_MPI_General *gen_to,*gen_from; 5036 PetscErrorCode ierr; 5037 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5038 Mat_SeqAIJ *b_oth; 5039 VecScatter ctx=a->Mvctx; 5040 MPI_Comm comm=((PetscObject)ctx)->comm; 5041 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5042 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5043 PetscScalar *rvalues,*svalues; 5044 MatScalar *b_otha,*bufa,*bufA; 5045 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5046 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5047 MPI_Status *sstatus,rstatus; 5048 PetscMPIInt jj; 5049 PetscInt *cols,sbs,rbs; 5050 PetscScalar *vals; 5051 5052 PetscFunctionBegin; 5053 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5054 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5055 } 5056 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5057 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5058 5059 gen_to = (VecScatter_MPI_General*)ctx->todata; 5060 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5061 rvalues = gen_from->values; /* holds the length of receiving row */ 5062 svalues = gen_to->values; /* holds the length of sending row */ 5063 nrecvs = gen_from->n; 5064 nsends = gen_to->n; 5065 5066 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5067 srow = gen_to->indices; /* local row index to be sent */ 5068 sstarts = gen_to->starts; 5069 sprocs = gen_to->procs; 5070 sstatus = gen_to->sstatus; 5071 sbs = gen_to->bs; 5072 rstarts = gen_from->starts; 5073 rprocs = gen_from->procs; 5074 rbs = gen_from->bs; 5075 5076 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5077 if (scall == MAT_INITIAL_MATRIX){ 5078 /* i-array */ 5079 /*---------*/ 5080 /* post receives */ 5081 for (i=0; i<nrecvs; i++){ 5082 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5083 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5084 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5085 } 5086 5087 /* pack the outgoing message */ 5088 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5089 sstartsj[0] = 0; rstartsj[0] = 0; 5090 len = 0; /* total length of j or a array to be sent */ 5091 k = 0; 5092 for (i=0; i<nsends; i++){ 5093 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5094 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5095 for (j=0; j<nrows; j++) { 5096 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5097 for (l=0; l<sbs; l++){ 5098 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5099 rowlen[j*sbs+l] = ncols; 5100 len += ncols; 5101 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5102 } 5103 k++; 5104 } 5105 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5106 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5107 } 5108 /* recvs and sends of i-array are completed */ 5109 i = nrecvs; 5110 while (i--) { 5111 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5112 } 5113 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5114 5115 /* allocate buffers for sending j and a arrays */ 5116 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5117 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5118 5119 /* create i-array of B_oth */ 5120 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5121 b_othi[0] = 0; 5122 len = 0; /* total length of j or a array to be received */ 5123 k = 0; 5124 for (i=0; i<nrecvs; i++){ 5125 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5126 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5127 for (j=0; j<nrows; j++) { 5128 b_othi[k+1] = b_othi[k] + rowlen[j]; 5129 len += rowlen[j]; k++; 5130 } 5131 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5132 } 5133 5134 /* allocate space for j and a arrrays of B_oth */ 5135 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5136 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5137 5138 /* j-array */ 5139 /*---------*/ 5140 /* post receives of j-array */ 5141 for (i=0; i<nrecvs; i++){ 5142 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5143 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5144 } 5145 5146 /* pack the outgoing message j-array */ 5147 k = 0; 5148 for (i=0; i<nsends; i++){ 5149 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5150 bufJ = bufj+sstartsj[i]; 5151 for (j=0; j<nrows; j++) { 5152 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5153 for (ll=0; ll<sbs; ll++){ 5154 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5155 for (l=0; l<ncols; l++){ 5156 *bufJ++ = cols[l]; 5157 } 5158 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5159 } 5160 } 5161 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5162 } 5163 5164 /* recvs and sends of j-array are completed */ 5165 i = nrecvs; 5166 while (i--) { 5167 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5168 } 5169 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5170 } else if (scall == MAT_REUSE_MATRIX){ 5171 sstartsj = *startsj; 5172 rstartsj = *startsj_r; 5173 bufa = *bufa_ptr; 5174 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5175 b_otha = b_oth->a; 5176 } else { 5177 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5178 } 5179 5180 /* a-array */ 5181 /*---------*/ 5182 /* post receives of a-array */ 5183 for (i=0; i<nrecvs; i++){ 5184 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5185 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5186 } 5187 5188 /* pack the outgoing message a-array */ 5189 k = 0; 5190 for (i=0; i<nsends; i++){ 5191 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5192 bufA = bufa+sstartsj[i]; 5193 for (j=0; j<nrows; j++) { 5194 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5195 for (ll=0; ll<sbs; ll++){ 5196 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5197 for (l=0; l<ncols; l++){ 5198 *bufA++ = vals[l]; 5199 } 5200 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5201 } 5202 } 5203 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5204 } 5205 /* recvs and sends of a-array are completed */ 5206 i = nrecvs; 5207 while (i--) { 5208 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5209 } 5210 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5211 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5212 5213 if (scall == MAT_INITIAL_MATRIX){ 5214 /* put together the new matrix */ 5215 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5216 5217 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5218 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5219 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5220 b_oth->free_a = PETSC_TRUE; 5221 b_oth->free_ij = PETSC_TRUE; 5222 b_oth->nonew = 0; 5223 5224 ierr = PetscFree(bufj);CHKERRQ(ierr); 5225 if (!startsj || !bufa_ptr){ 5226 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5227 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5228 } else { 5229 *startsj = sstartsj; 5230 *startsj_r = rstartsj; 5231 *bufa_ptr = bufa; 5232 } 5233 } 5234 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5235 PetscFunctionReturn(0); 5236 } 5237 5238 #undef __FUNCT__ 5239 #define __FUNCT__ "MatGetCommunicationStructs" 5240 /*@C 5241 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5242 5243 Not Collective 5244 5245 Input Parameters: 5246 . A - The matrix in mpiaij format 5247 5248 Output Parameter: 5249 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5250 . colmap - A map from global column index to local index into lvec 5251 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5252 5253 Level: developer 5254 5255 @*/ 5256 #if defined (PETSC_USE_CTABLE) 5257 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5258 #else 5259 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5260 #endif 5261 { 5262 Mat_MPIAIJ *a; 5263 5264 PetscFunctionBegin; 5265 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5266 PetscValidPointer(lvec, 2); 5267 PetscValidPointer(colmap, 3); 5268 PetscValidPointer(multScatter, 4); 5269 a = (Mat_MPIAIJ *) A->data; 5270 if (lvec) *lvec = a->lvec; 5271 if (colmap) *colmap = a->colmap; 5272 if (multScatter) *multScatter = a->Mvctx; 5273 PetscFunctionReturn(0); 5274 } 5275 5276 EXTERN_C_BEGIN 5277 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5278 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5279 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5280 EXTERN_C_END 5281 5282 #undef __FUNCT__ 5283 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5284 /* 5285 Computes (B'*A')' since computing B*A directly is untenable 5286 5287 n p p 5288 ( ) ( ) ( ) 5289 m ( A ) * n ( B ) = m ( C ) 5290 ( ) ( ) ( ) 5291 5292 */ 5293 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5294 { 5295 PetscErrorCode ierr; 5296 Mat At,Bt,Ct; 5297 5298 PetscFunctionBegin; 5299 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5300 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5301 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5302 ierr = MatDestroy(At);CHKERRQ(ierr); 5303 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5304 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5305 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5306 PetscFunctionReturn(0); 5307 } 5308 5309 #undef __FUNCT__ 5310 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5311 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5312 { 5313 PetscErrorCode ierr; 5314 PetscInt m=A->rmap->n,n=B->cmap->n; 5315 Mat Cmat; 5316 5317 PetscFunctionBegin; 5318 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5319 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5320 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5321 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5322 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5323 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5324 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5325 *C = Cmat; 5326 PetscFunctionReturn(0); 5327 } 5328 5329 /* ----------------------------------------------------------------*/ 5330 #undef __FUNCT__ 5331 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5332 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5333 { 5334 PetscErrorCode ierr; 5335 5336 PetscFunctionBegin; 5337 if (scall == MAT_INITIAL_MATRIX){ 5338 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5339 } 5340 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5341 PetscFunctionReturn(0); 5342 } 5343 5344 EXTERN_C_BEGIN 5345 #if defined(PETSC_HAVE_MUMPS) 5346 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5347 #endif 5348 #if defined(PETSC_HAVE_PASTIX) 5349 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5350 #endif 5351 #if defined(PETSC_HAVE_SUPERLU_DIST) 5352 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5353 #endif 5354 #if defined(PETSC_HAVE_SPOOLES) 5355 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5356 #endif 5357 EXTERN_C_END 5358 5359 /*MC 5360 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5361 5362 Options Database Keys: 5363 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5364 5365 Level: beginner 5366 5367 .seealso: MatCreateMPIAIJ() 5368 M*/ 5369 5370 EXTERN_C_BEGIN 5371 #undef __FUNCT__ 5372 #define __FUNCT__ "MatCreate_MPIAIJ" 5373 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5374 { 5375 Mat_MPIAIJ *b; 5376 PetscErrorCode ierr; 5377 PetscMPIInt size; 5378 5379 PetscFunctionBegin; 5380 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5381 5382 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5383 B->data = (void*)b; 5384 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5385 B->rmap->bs = 1; 5386 B->assembled = PETSC_FALSE; 5387 5388 B->insertmode = NOT_SET_VALUES; 5389 b->size = size; 5390 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5391 5392 /* build cache for off array entries formed */ 5393 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5394 b->donotstash = PETSC_FALSE; 5395 b->colmap = 0; 5396 b->garray = 0; 5397 b->roworiented = PETSC_TRUE; 5398 5399 /* stuff used for matrix vector multiply */ 5400 b->lvec = PETSC_NULL; 5401 b->Mvctx = PETSC_NULL; 5402 5403 /* stuff for MatGetRow() */ 5404 b->rowindices = 0; 5405 b->rowvalues = 0; 5406 b->getrowactive = PETSC_FALSE; 5407 5408 #if defined(PETSC_HAVE_SPOOLES) 5409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5410 "MatGetFactor_mpiaij_spooles", 5411 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5412 #endif 5413 #if defined(PETSC_HAVE_MUMPS) 5414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5415 "MatGetFactor_aij_mumps", 5416 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5417 #endif 5418 #if defined(PETSC_HAVE_PASTIX) 5419 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5420 "MatGetFactor_mpiaij_pastix", 5421 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5422 #endif 5423 #if defined(PETSC_HAVE_SUPERLU_DIST) 5424 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5425 "MatGetFactor_mpiaij_superlu_dist", 5426 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5427 #endif 5428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5429 "MatStoreValues_MPIAIJ", 5430 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5431 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5432 "MatRetrieveValues_MPIAIJ", 5433 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5434 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5435 "MatGetDiagonalBlock_MPIAIJ", 5436 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5437 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5438 "MatIsTranspose_MPIAIJ", 5439 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5440 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5441 "MatMPIAIJSetPreallocation_MPIAIJ", 5442 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5443 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5444 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5445 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5446 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5447 "MatDiagonalScaleLocal_MPIAIJ", 5448 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5449 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5450 "MatConvert_MPIAIJ_MPIAIJPERM", 5451 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5452 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5453 "MatConvert_MPIAIJ_MPIAIJCRL", 5454 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5455 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5456 "MatConvert_MPIAIJ_MPISBAIJ", 5457 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5458 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5459 "MatMatMult_MPIDense_MPIAIJ", 5460 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5461 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5462 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5463 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5464 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5465 "MatMatMultNumeric_MPIDense_MPIAIJ", 5466 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5467 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5468 PetscFunctionReturn(0); 5469 } 5470 EXTERN_C_END 5471 5472 #undef __FUNCT__ 5473 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5474 /*@ 5475 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5476 and "off-diagonal" part of the matrix in CSR format. 5477 5478 Collective on MPI_Comm 5479 5480 Input Parameters: 5481 + comm - MPI communicator 5482 . m - number of local rows (Cannot be PETSC_DECIDE) 5483 . n - This value should be the same as the local size used in creating the 5484 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5485 calculated if N is given) For square matrices n is almost always m. 5486 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5487 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5488 . i - row indices for "diagonal" portion of matrix 5489 . j - column indices 5490 . a - matrix values 5491 . oi - row indices for "off-diagonal" portion of matrix 5492 . oj - column indices 5493 - oa - matrix values 5494 5495 Output Parameter: 5496 . mat - the matrix 5497 5498 Level: advanced 5499 5500 Notes: 5501 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5502 must free the arrays once the matrix has been destroyed and not before. 5503 5504 The i and j indices are 0 based 5505 5506 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5507 5508 This sets local rows and cannot be used to set off-processor values. 5509 5510 You cannot later use MatSetValues() to change values in this matrix. 5511 5512 .keywords: matrix, aij, compressed row, sparse, parallel 5513 5514 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5515 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5516 @*/ 5517 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5518 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5519 { 5520 PetscErrorCode ierr; 5521 Mat_MPIAIJ *maij; 5522 5523 PetscFunctionBegin; 5524 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5525 if (i[0]) { 5526 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5527 } 5528 if (oi[0]) { 5529 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5530 } 5531 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5532 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5533 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5534 maij = (Mat_MPIAIJ*) (*mat)->data; 5535 maij->donotstash = PETSC_TRUE; 5536 (*mat)->preallocated = PETSC_TRUE; 5537 5538 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5539 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5540 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5541 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5542 5543 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5544 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5545 5546 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5547 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5548 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5549 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5550 5551 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5552 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5553 PetscFunctionReturn(0); 5554 } 5555 5556 /* 5557 Special version for direct calls from Fortran 5558 */ 5559 #include "private/fortranimpl.h" 5560 5561 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5562 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5563 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5564 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5565 #endif 5566 5567 /* Change these macros so can be used in void function */ 5568 #undef CHKERRQ 5569 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5570 #undef SETERRQ2 5571 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5572 #undef SETERRQ 5573 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5574 5575 EXTERN_C_BEGIN 5576 #undef __FUNCT__ 5577 #define __FUNCT__ "matsetvaluesmpiaij_" 5578 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5579 { 5580 Mat mat = *mmat; 5581 PetscInt m = *mm, n = *mn; 5582 InsertMode addv = *maddv; 5583 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5584 PetscScalar value; 5585 PetscErrorCode ierr; 5586 5587 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5588 if (mat->insertmode == NOT_SET_VALUES) { 5589 mat->insertmode = addv; 5590 } 5591 #if defined(PETSC_USE_DEBUG) 5592 else if (mat->insertmode != addv) { 5593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5594 } 5595 #endif 5596 { 5597 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5598 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5599 PetscBool roworiented = aij->roworiented; 5600 5601 /* Some Variables required in the macro */ 5602 Mat A = aij->A; 5603 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5604 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5605 MatScalar *aa = a->a; 5606 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5607 Mat B = aij->B; 5608 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5609 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5610 MatScalar *ba = b->a; 5611 5612 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5613 PetscInt nonew = a->nonew; 5614 MatScalar *ap1,*ap2; 5615 5616 PetscFunctionBegin; 5617 for (i=0; i<m; i++) { 5618 if (im[i] < 0) continue; 5619 #if defined(PETSC_USE_DEBUG) 5620 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5621 #endif 5622 if (im[i] >= rstart && im[i] < rend) { 5623 row = im[i] - rstart; 5624 lastcol1 = -1; 5625 rp1 = aj + ai[row]; 5626 ap1 = aa + ai[row]; 5627 rmax1 = aimax[row]; 5628 nrow1 = ailen[row]; 5629 low1 = 0; 5630 high1 = nrow1; 5631 lastcol2 = -1; 5632 rp2 = bj + bi[row]; 5633 ap2 = ba + bi[row]; 5634 rmax2 = bimax[row]; 5635 nrow2 = bilen[row]; 5636 low2 = 0; 5637 high2 = nrow2; 5638 5639 for (j=0; j<n; j++) { 5640 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5641 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5642 if (in[j] >= cstart && in[j] < cend){ 5643 col = in[j] - cstart; 5644 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5645 } else if (in[j] < 0) continue; 5646 #if defined(PETSC_USE_DEBUG) 5647 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5648 #endif 5649 else { 5650 if (mat->was_assembled) { 5651 if (!aij->colmap) { 5652 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5653 } 5654 #if defined (PETSC_USE_CTABLE) 5655 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5656 col--; 5657 #else 5658 col = aij->colmap[in[j]] - 1; 5659 #endif 5660 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5661 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5662 col = in[j]; 5663 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5664 B = aij->B; 5665 b = (Mat_SeqAIJ*)B->data; 5666 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5667 rp2 = bj + bi[row]; 5668 ap2 = ba + bi[row]; 5669 rmax2 = bimax[row]; 5670 nrow2 = bilen[row]; 5671 low2 = 0; 5672 high2 = nrow2; 5673 bm = aij->B->rmap->n; 5674 ba = b->a; 5675 } 5676 } else col = in[j]; 5677 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5678 } 5679 } 5680 } else { 5681 if (!aij->donotstash) { 5682 if (roworiented) { 5683 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5684 } else { 5685 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5686 } 5687 } 5688 } 5689 }} 5690 PetscFunctionReturnVoid(); 5691 } 5692 EXTERN_C_END 5693 5694