1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 8 { 9 PetscErrorCode ierr; 10 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 11 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 12 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 13 const PetscInt *ia,*ib; 14 const MatScalar *aa,*bb; 15 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 16 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 17 18 PetscFunctionBegin; 19 *keptrows = 0; 20 ia = a->i; 21 ib = b->i; 22 for (i=0; i<m; i++) { 23 na = ia[i+1] - ia[i]; 24 nb = ib[i+1] - ib[i]; 25 if (!na && !nb) { 26 cnt++; 27 goto ok1; 28 } 29 aa = a->a + ia[i]; 30 for (j=0; j<na; j++) { 31 if (aa[j] != 0.0) goto ok1; 32 } 33 bb = b->a + ib[i]; 34 for (j=0; j <nb; j++) { 35 if (bb[j] != 0.0) goto ok1; 36 } 37 cnt++; 38 ok1:; 39 } 40 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 41 if (!n0rows) PetscFunctionReturn(0); 42 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 43 cnt = 0; 44 for (i=0; i<m; i++) { 45 na = ia[i+1] - ia[i]; 46 nb = ib[i+1] - ib[i]; 47 if (!na && !nb) continue; 48 aa = a->a + ia[i]; 49 for(j=0; j<na;j++) { 50 if (aa[j] != 0.0) { 51 rows[cnt++] = rstart + i; 52 goto ok2; 53 } 54 } 55 bb = b->a + ib[i]; 56 for (j=0; j<nb; j++) { 57 if (bb[j] != 0.0) { 58 rows[cnt++] = rstart + i; 59 goto ok2; 60 } 61 } 62 ok2:; 63 } 64 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatDistribute_MPIAIJ" 70 /* 71 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 72 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 73 74 Only for square matrices 75 */ 76 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 77 { 78 PetscMPIInt rank,size; 79 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 80 PetscErrorCode ierr; 81 Mat mat; 82 Mat_SeqAIJ *gmata; 83 PetscMPIInt tag; 84 MPI_Status status; 85 PetscBool aij; 86 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 87 88 PetscFunctionBegin; 89 CHKMEMQ; 90 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 91 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 92 if (!rank) { 93 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 94 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 95 } 96 if (reuse == MAT_INITIAL_MATRIX) { 97 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 98 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 99 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 100 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 101 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 102 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 103 rowners[0] = 0; 104 for (i=2; i<=size; i++) { 105 rowners[i] += rowners[i-1]; 106 } 107 rstart = rowners[rank]; 108 rend = rowners[rank+1]; 109 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 110 if (!rank) { 111 gmata = (Mat_SeqAIJ*) gmat->data; 112 /* send row lengths to all processors */ 113 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 114 for (i=1; i<size; i++) { 115 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 116 } 117 /* determine number diagonal and off-diagonal counts */ 118 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 119 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 120 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 121 jj = 0; 122 for (i=0; i<m; i++) { 123 for (j=0; j<dlens[i]; j++) { 124 if (gmata->j[jj] < rstart) ld[i]++; 125 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 126 jj++; 127 } 128 } 129 /* send column indices to other processes */ 130 for (i=1; i<size; i++) { 131 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 132 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 133 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 134 } 135 136 /* send numerical values to other processes */ 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 gmataa = gmata->a; 142 gmataj = gmata->j; 143 144 } else { 145 /* receive row lengths */ 146 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 147 /* receive column indices */ 148 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 149 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 150 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 151 /* determine number diagonal and off-diagonal counts */ 152 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 153 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 154 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 155 jj = 0; 156 for (i=0; i<m; i++) { 157 for (j=0; j<dlens[i]; j++) { 158 if (gmataj[jj] < rstart) ld[i]++; 159 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 160 jj++; 161 } 162 } 163 /* receive numerical values */ 164 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 166 } 167 /* set preallocation */ 168 for (i=0; i<m; i++) { 169 dlens[i] -= olens[i]; 170 } 171 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 172 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 173 174 for (i=0; i<m; i++) { 175 dlens[i] += olens[i]; 176 } 177 cnt = 0; 178 for (i=0; i<m; i++) { 179 row = rstart + i; 180 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 181 cnt += dlens[i]; 182 } 183 if (rank) { 184 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 185 } 186 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 187 ierr = PetscFree(rowners);CHKERRQ(ierr); 188 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 189 *inmat = mat; 190 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 191 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 192 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 193 mat = *inmat; 194 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 195 if (!rank) { 196 /* send numerical values to other processes */ 197 gmata = (Mat_SeqAIJ*) gmat->data; 198 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 199 gmataa = gmata->a; 200 for (i=1; i<size; i++) { 201 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 202 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 203 } 204 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 205 } else { 206 /* receive numerical values from process 0*/ 207 nz = Ad->nz + Ao->nz; 208 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 209 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 210 } 211 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 212 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 213 ad = Ad->a; 214 ao = Ao->a; 215 if (mat->rmap->n) { 216 i = 0; 217 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 218 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 219 } 220 for (i=1; i<mat->rmap->n; i++) { 221 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 222 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 223 } 224 i--; 225 if (mat->rmap->n) { 226 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 227 } 228 if (rank) { 229 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 230 } 231 } 232 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 CHKMEMQ; 235 PetscFunctionReturn(0); 236 } 237 238 /* 239 Local utility routine that creates a mapping from the global column 240 number to the local number in the off-diagonal part of the local 241 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 242 a slightly higher hash table cost; without it it is not scalable (each processor 243 has an order N integer array but is fast to acess. 244 */ 245 #undef __FUNCT__ 246 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 247 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 248 { 249 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 250 PetscErrorCode ierr; 251 PetscInt n = aij->B->cmap->n,i; 252 253 PetscFunctionBegin; 254 #if defined (PETSC_USE_CTABLE) 255 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 256 for (i=0; i<n; i++){ 257 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 258 } 259 #else 260 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 261 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 262 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 263 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 264 #endif 265 PetscFunctionReturn(0); 266 } 267 268 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 269 { \ 270 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 271 lastcol1 = col;\ 272 while (high1-low1 > 5) { \ 273 t = (low1+high1)/2; \ 274 if (rp1[t] > col) high1 = t; \ 275 else low1 = t; \ 276 } \ 277 for (_i=low1; _i<high1; _i++) { \ 278 if (rp1[_i] > col) break; \ 279 if (rp1[_i] == col) { \ 280 if (addv == ADD_VALUES) ap1[_i] += value; \ 281 else ap1[_i] = value; \ 282 goto a_noinsert; \ 283 } \ 284 } \ 285 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 286 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 287 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 288 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 289 N = nrow1++ - 1; a->nz++; high1++; \ 290 /* shift up all the later entries in this row */ \ 291 for (ii=N; ii>=_i; ii--) { \ 292 rp1[ii+1] = rp1[ii]; \ 293 ap1[ii+1] = ap1[ii]; \ 294 } \ 295 rp1[_i] = col; \ 296 ap1[_i] = value; \ 297 a_noinsert: ; \ 298 ailen[row] = nrow1; \ 299 } 300 301 302 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 303 { \ 304 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 305 lastcol2 = col;\ 306 while (high2-low2 > 5) { \ 307 t = (low2+high2)/2; \ 308 if (rp2[t] > col) high2 = t; \ 309 else low2 = t; \ 310 } \ 311 for (_i=low2; _i<high2; _i++) { \ 312 if (rp2[_i] > col) break; \ 313 if (rp2[_i] == col) { \ 314 if (addv == ADD_VALUES) ap2[_i] += value; \ 315 else ap2[_i] = value; \ 316 goto b_noinsert; \ 317 } \ 318 } \ 319 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 320 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 321 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 322 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 323 N = nrow2++ - 1; b->nz++; high2++; \ 324 /* shift up all the later entries in this row */ \ 325 for (ii=N; ii>=_i; ii--) { \ 326 rp2[ii+1] = rp2[ii]; \ 327 ap2[ii+1] = ap2[ii]; \ 328 } \ 329 rp2[_i] = col; \ 330 ap2[_i] = value; \ 331 b_noinsert: ; \ 332 bilen[row] = nrow2; \ 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 337 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 338 { 339 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 340 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 341 PetscErrorCode ierr; 342 PetscInt l,*garray = mat->garray,diag; 343 344 PetscFunctionBegin; 345 /* code only works for square matrices A */ 346 347 /* find size of row to the left of the diagonal part */ 348 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 349 row = row - diag; 350 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 351 if (garray[b->j[b->i[row]+l]] > diag) break; 352 } 353 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 354 355 /* diagonal part */ 356 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 357 358 /* right of diagonal part */ 359 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatSetValues_MPIAIJ" 365 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 368 PetscScalar value; 369 PetscErrorCode ierr; 370 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 371 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 372 PetscBool roworiented = aij->roworiented; 373 374 /* Some Variables required in the macro */ 375 Mat A = aij->A; 376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 377 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 378 MatScalar *aa = a->a; 379 PetscBool ignorezeroentries = a->ignorezeroentries; 380 Mat B = aij->B; 381 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 382 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 383 MatScalar *ba = b->a; 384 385 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 386 PetscInt nonew = a->nonew; 387 MatScalar *ap1,*ap2; 388 389 PetscFunctionBegin; 390 if (v) PetscValidScalarPointer(v,6); 391 for (i=0; i<m; i++) { 392 if (im[i] < 0) continue; 393 #if defined(PETSC_USE_DEBUG) 394 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 395 #endif 396 if (im[i] >= rstart && im[i] < rend) { 397 row = im[i] - rstart; 398 lastcol1 = -1; 399 rp1 = aj + ai[row]; 400 ap1 = aa + ai[row]; 401 rmax1 = aimax[row]; 402 nrow1 = ailen[row]; 403 low1 = 0; 404 high1 = nrow1; 405 lastcol2 = -1; 406 rp2 = bj + bi[row]; 407 ap2 = ba + bi[row]; 408 rmax2 = bimax[row]; 409 nrow2 = bilen[row]; 410 low2 = 0; 411 high2 = nrow2; 412 413 for (j=0; j<n; j++) { 414 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 415 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 416 if (in[j] >= cstart && in[j] < cend){ 417 col = in[j] - cstart; 418 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_DEBUG) 421 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!aij->colmap) { 426 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 430 col--; 431 #else 432 col = aij->colmap[in[j]] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 435 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 438 B = aij->B; 439 b = (Mat_SeqAIJ*)B->data; 440 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 441 rp2 = bj + bi[row]; 442 ap2 = ba + bi[row]; 443 rmax2 = bimax[row]; 444 nrow2 = bilen[row]; 445 low2 = 0; 446 high2 = nrow2; 447 bm = aij->B->rmap->n; 448 ba = b->a; 449 } 450 } else col = in[j]; 451 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 452 } 453 } 454 } else { 455 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 456 if (!aij->donotstash) { 457 if (roworiented) { 458 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 459 } else { 460 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 461 } 462 } 463 } 464 } 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatGetValues_MPIAIJ" 470 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 471 { 472 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 473 PetscErrorCode ierr; 474 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 475 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 476 477 PetscFunctionBegin; 478 for (i=0; i<m; i++) { 479 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 480 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 481 if (idxm[i] >= rstart && idxm[i] < rend) { 482 row = idxm[i] - rstart; 483 for (j=0; j<n; j++) { 484 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 485 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 486 if (idxn[j] >= cstart && idxn[j] < cend){ 487 col = idxn[j] - cstart; 488 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 489 } else { 490 if (!aij->colmap) { 491 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 492 } 493 #if defined (PETSC_USE_CTABLE) 494 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 495 col --; 496 #else 497 col = aij->colmap[idxn[j]] - 1; 498 #endif 499 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 500 else { 501 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 502 } 503 } 504 } 505 } else { 506 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 507 } 508 } 509 PetscFunctionReturn(0); 510 } 511 512 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 516 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 517 { 518 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 519 PetscErrorCode ierr; 520 PetscInt nstash,reallocs; 521 InsertMode addv; 522 523 PetscFunctionBegin; 524 if (aij->donotstash || mat->nooffprocentries) { 525 PetscFunctionReturn(0); 526 } 527 528 /* make sure all processors are either in INSERTMODE or ADDMODE */ 529 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 530 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 531 mat->insertmode = addv; /* in case this processor had no cache */ 532 533 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 534 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 535 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 541 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 544 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 545 PetscErrorCode ierr; 546 PetscMPIInt n; 547 PetscInt i,j,rstart,ncols,flg; 548 PetscInt *row,*col; 549 PetscBool other_disassembled; 550 PetscScalar *val; 551 InsertMode addv = mat->insertmode; 552 553 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 554 PetscFunctionBegin; 555 if (!aij->donotstash && !mat->nooffprocentries) { 556 while (1) { 557 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 558 if (!flg) break; 559 560 for (i=0; i<n;) { 561 /* Now identify the consecutive vals belonging to the same row */ 562 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 563 if (j < n) ncols = j-i; 564 else ncols = n-i; 565 /* Now assemble all these values with a single function call */ 566 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 567 i = j; 568 } 569 } 570 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 571 } 572 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 573 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 574 575 /* determine if any processor has disassembled, if so we must 576 also disassemble ourselfs, in order that we may reassemble. */ 577 /* 578 if nonzero structure of submatrix B cannot change then we know that 579 no processor disassembled thus we can skip this stuff 580 */ 581 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 582 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 583 if (mat->was_assembled && !other_disassembled) { 584 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 585 } 586 } 587 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 588 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 589 } 590 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 591 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 592 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 596 aij->rowvalues = 0; 597 598 /* used by MatAXPY() */ 599 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 600 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 601 602 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 603 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 609 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 610 { 611 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 612 PetscErrorCode ierr; 613 614 PetscFunctionBegin; 615 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 616 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatZeroRows_MPIAIJ" 622 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 623 { 624 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 625 PetscErrorCode ierr; 626 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 627 PetscInt i,*owners = A->rmap->range; 628 PetscInt *nprocs,j,idx,nsends,row; 629 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 630 PetscInt *rvalues,count,base,slen,*source; 631 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 632 MPI_Comm comm = ((PetscObject)A)->comm; 633 MPI_Request *send_waits,*recv_waits; 634 MPI_Status recv_status,*send_status; 635 const PetscScalar *xx; 636 PetscScalar *bb; 637 #if defined(PETSC_DEBUG) 638 PetscBool found = PETSC_FALSE; 639 #endif 640 641 PetscFunctionBegin; 642 /* first count number of contributors to each processor */ 643 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 644 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 645 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 646 j = 0; 647 for (i=0; i<N; i++) { 648 if (lastidx > (idx = rows[i])) j = 0; 649 lastidx = idx; 650 for (; j<size; j++) { 651 if (idx >= owners[j] && idx < owners[j+1]) { 652 nprocs[2*j]++; 653 nprocs[2*j+1] = 1; 654 owner[i] = j; 655 #if defined(PETSC_DEBUG) 656 found = PETSC_TRUE; 657 #endif 658 break; 659 } 660 } 661 #if defined(PETSC_DEBUG) 662 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 663 found = PETSC_FALSE; 664 #endif 665 } 666 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 667 668 if (A->nooffproczerorows) { 669 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 670 nrecvs = nsends; 671 nmax = N; 672 } else { 673 /* inform other processors of number of messages and max length*/ 674 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 675 } 676 677 /* post receives: */ 678 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 679 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 680 for (i=0; i<nrecvs; i++) { 681 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 682 } 683 684 /* do sends: 685 1) starts[i] gives the starting index in svalues for stuff going to 686 the ith processor 687 */ 688 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 689 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 690 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 691 starts[0] = 0; 692 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 693 for (i=0; i<N; i++) { 694 svalues[starts[owner[i]]++] = rows[i]; 695 } 696 697 starts[0] = 0; 698 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 699 count = 0; 700 for (i=0; i<size; i++) { 701 if (nprocs[2*i+1]) { 702 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 703 } 704 } 705 ierr = PetscFree(starts);CHKERRQ(ierr); 706 707 base = owners[rank]; 708 709 /* wait on receives */ 710 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 711 count = nrecvs; slen = 0; 712 while (count) { 713 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 714 /* unpack receives into our local space */ 715 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 716 source[imdex] = recv_status.MPI_SOURCE; 717 lens[imdex] = n; 718 slen += n; 719 count--; 720 } 721 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 722 723 /* move the data into the send scatter */ 724 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 725 count = 0; 726 for (i=0; i<nrecvs; i++) { 727 values = rvalues + i*nmax; 728 for (j=0; j<lens[i]; j++) { 729 lrows[count++] = values[j] - base; 730 } 731 } 732 ierr = PetscFree(rvalues);CHKERRQ(ierr); 733 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 734 ierr = PetscFree(owner);CHKERRQ(ierr); 735 ierr = PetscFree(nprocs);CHKERRQ(ierr); 736 737 /* fix right hand side if needed */ 738 if (x && b) { 739 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 740 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 741 for (i=0; i<slen; i++) { 742 bb[lrows[i]] = diag*xx[lrows[i]]; 743 } 744 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 745 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 746 } 747 /* 748 Zero the required rows. If the "diagonal block" of the matrix 749 is square and the user wishes to set the diagonal we use separate 750 code so that MatSetValues() is not called for each diagonal allocating 751 new memory, thus calling lots of mallocs and slowing things down. 752 753 */ 754 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 755 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 756 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 757 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 758 } else if (diag != 0.0) { 759 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 760 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 761 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 762 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 763 } 764 for (i = 0; i < slen; i++) { 765 row = lrows[i] + rstart; 766 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 767 } 768 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 769 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 770 } else { 771 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 772 } 773 ierr = PetscFree(lrows);CHKERRQ(ierr); 774 775 /* wait on sends */ 776 if (nsends) { 777 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 778 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 779 ierr = PetscFree(send_status);CHKERRQ(ierr); 780 } 781 ierr = PetscFree(send_waits);CHKERRQ(ierr); 782 ierr = PetscFree(svalues);CHKERRQ(ierr); 783 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 789 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 790 { 791 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 792 PetscErrorCode ierr; 793 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 794 PetscInt i,*owners = A->rmap->range; 795 PetscInt *nprocs,j,idx,nsends; 796 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 797 PetscInt *rvalues,count,base,slen,*source; 798 PetscInt *lens,*lrows,*values,m; 799 MPI_Comm comm = ((PetscObject)A)->comm; 800 MPI_Request *send_waits,*recv_waits; 801 MPI_Status recv_status,*send_status; 802 const PetscScalar *xx; 803 PetscScalar *bb,*mask; 804 Vec xmask,lmask; 805 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 806 const PetscInt *aj, *ii,*ridx; 807 PetscScalar *aa; 808 #if defined(PETSC_DEBUG) 809 PetscBool found = PETSC_FALSE; 810 #endif 811 812 PetscFunctionBegin; 813 /* first count number of contributors to each processor */ 814 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 815 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 816 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 817 j = 0; 818 for (i=0; i<N; i++) { 819 if (lastidx > (idx = rows[i])) j = 0; 820 lastidx = idx; 821 for (; j<size; j++) { 822 if (idx >= owners[j] && idx < owners[j+1]) { 823 nprocs[2*j]++; 824 nprocs[2*j+1] = 1; 825 owner[i] = j; 826 #if defined(PETSC_DEBUG) 827 found = PETSC_TRUE; 828 #endif 829 break; 830 } 831 } 832 #if defined(PETSC_DEBUG) 833 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 834 found = PETSC_FALSE; 835 #endif 836 } 837 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 838 839 /* inform other processors of number of messages and max length*/ 840 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 841 842 /* post receives: */ 843 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 844 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 845 for (i=0; i<nrecvs; i++) { 846 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 847 } 848 849 /* do sends: 850 1) starts[i] gives the starting index in svalues for stuff going to 851 the ith processor 852 */ 853 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 854 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 855 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 856 starts[0] = 0; 857 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 858 for (i=0; i<N; i++) { 859 svalues[starts[owner[i]]++] = rows[i]; 860 } 861 862 starts[0] = 0; 863 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 864 count = 0; 865 for (i=0; i<size; i++) { 866 if (nprocs[2*i+1]) { 867 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 868 } 869 } 870 ierr = PetscFree(starts);CHKERRQ(ierr); 871 872 base = owners[rank]; 873 874 /* wait on receives */ 875 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 876 count = nrecvs; slen = 0; 877 while (count) { 878 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 879 /* unpack receives into our local space */ 880 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 881 source[imdex] = recv_status.MPI_SOURCE; 882 lens[imdex] = n; 883 slen += n; 884 count--; 885 } 886 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 887 888 /* move the data into the send scatter */ 889 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 890 count = 0; 891 for (i=0; i<nrecvs; i++) { 892 values = rvalues + i*nmax; 893 for (j=0; j<lens[i]; j++) { 894 lrows[count++] = values[j] - base; 895 } 896 } 897 ierr = PetscFree(rvalues);CHKERRQ(ierr); 898 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 899 ierr = PetscFree(owner);CHKERRQ(ierr); 900 ierr = PetscFree(nprocs);CHKERRQ(ierr); 901 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 902 903 /* zero diagonal part of matrix */ 904 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 905 906 /* handle off diagonal part of matrix */ 907 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 908 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 909 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 910 for (i=0; i<slen; i++) { 911 bb[lrows[i]] = 1; 912 } 913 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 914 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecDestroy(xmask);CHKERRQ(ierr); 917 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 921 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 922 923 /* remove zeroed rows of off diagonal matrix */ 924 ii = aij->i; 925 for (i=0; i<slen; i++) { 926 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 927 } 928 929 /* loop over all elements of off process part of matrix zeroing removed columns*/ 930 if (aij->compressedrow.use){ 931 m = aij->compressedrow.nrows; 932 ii = aij->compressedrow.i; 933 ridx = aij->compressedrow.rindex; 934 for (i=0; i<m; i++){ 935 n = ii[i+1] - ii[i]; 936 aj = aij->j + ii[i]; 937 aa = aij->a + ii[i]; 938 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 bb[*ridx] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 ridx++; 948 } 949 } else { /* do not use compressed row format */ 950 m = l->B->rmap->n; 951 for (i=0; i<m; i++) { 952 n = ii[i+1] - ii[i]; 953 aj = aij->j + ii[i]; 954 aa = aij->a + ii[i]; 955 for (j=0; j<n; j++) { 956 if (PetscAbsScalar(mask[*aj])) { 957 bb[i] -= *aa*xx[*aj]; 958 *aa = 0.0; 959 } 960 aa++; 961 aj++; 962 } 963 } 964 } 965 966 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 967 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 968 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 969 ierr = VecDestroy(lmask);CHKERRQ(ierr); 970 ierr = PetscFree(lrows);CHKERRQ(ierr); 971 972 /* wait on sends */ 973 if (nsends) { 974 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 975 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 976 ierr = PetscFree(send_status);CHKERRQ(ierr); 977 } 978 ierr = PetscFree(send_waits);CHKERRQ(ierr); 979 ierr = PetscFree(svalues);CHKERRQ(ierr); 980 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMult_MPIAIJ" 986 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 PetscInt nt; 991 992 PetscFunctionBegin; 993 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 994 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 995 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 997 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1004 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1016 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1017 { 1018 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1019 PetscErrorCode ierr; 1020 1021 PetscFunctionBegin; 1022 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1024 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1031 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1032 { 1033 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1034 PetscErrorCode ierr; 1035 PetscBool merged; 1036 1037 PetscFunctionBegin; 1038 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1039 /* do nondiagonal part */ 1040 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1041 if (!merged) { 1042 /* send it on its way */ 1043 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1044 /* do local part */ 1045 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1046 /* receive remote parts: note this assumes the values are not actually */ 1047 /* added in yy until the next line, */ 1048 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1049 } else { 1050 /* do local part */ 1051 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1052 /* send it on its way */ 1053 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1054 /* values actually were received in the Begin() but we need to call this nop */ 1055 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 } 1057 PetscFunctionReturn(0); 1058 } 1059 1060 EXTERN_C_BEGIN 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1063 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1064 { 1065 MPI_Comm comm; 1066 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1067 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1068 IS Me,Notme; 1069 PetscErrorCode ierr; 1070 PetscInt M,N,first,last,*notme,i; 1071 PetscMPIInt size; 1072 1073 PetscFunctionBegin; 1074 1075 /* Easy test: symmetric diagonal block */ 1076 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1077 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1078 if (!*f) PetscFunctionReturn(0); 1079 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1081 if (size == 1) PetscFunctionReturn(0); 1082 1083 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1084 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1085 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1086 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1087 for (i=0; i<first; i++) notme[i] = i; 1088 for (i=last; i<M; i++) notme[i-last+first] = i; 1089 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1090 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1091 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1092 Aoff = Aoffs[0]; 1093 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1094 Boff = Boffs[0]; 1095 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1096 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1097 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1098 ierr = ISDestroy(Me);CHKERRQ(ierr); 1099 ierr = ISDestroy(Notme);CHKERRQ(ierr); 1100 ierr = PetscFree(notme);CHKERRQ(ierr); 1101 1102 PetscFunctionReturn(0); 1103 } 1104 EXTERN_C_END 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1108 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 /* do nondiagonal part */ 1115 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1116 /* send it on its way */ 1117 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1118 /* do local part */ 1119 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1120 /* receive remote parts */ 1121 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 /* 1126 This only works correctly for square matrices where the subblock A->A is the 1127 diagonal block 1128 */ 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1131 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1132 { 1133 PetscErrorCode ierr; 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 1136 PetscFunctionBegin; 1137 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1138 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1139 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatScale_MPIAIJ" 1145 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1152 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatDestroy_MPIAIJ" 1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1159 { 1160 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 #if defined(PETSC_USE_LOG) 1165 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1166 #endif 1167 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1168 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 1169 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 1170 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 1171 #if defined (PETSC_USE_CTABLE) 1172 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 1173 #else 1174 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1177 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 1178 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 1179 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1180 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1181 ierr = PetscFree(aij);CHKERRQ(ierr); 1182 1183 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1197 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1198 { 1199 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1200 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1201 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1202 PetscErrorCode ierr; 1203 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1204 int fd; 1205 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1206 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1207 PetscScalar *column_values; 1208 PetscInt message_count,flowcontrolcount; 1209 1210 PetscFunctionBegin; 1211 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1212 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1213 nz = A->nz + B->nz; 1214 if (!rank) { 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = mat->rmap->N; 1217 header[2] = mat->cmap->N; 1218 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1219 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1220 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1221 /* get largest number of rows any processor has */ 1222 rlen = mat->rmap->n; 1223 range = mat->rmap->range; 1224 for (i=1; i<size; i++) { 1225 rlen = PetscMax(rlen,range[i+1] - range[i]); 1226 } 1227 } else { 1228 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1229 rlen = mat->rmap->n; 1230 } 1231 1232 /* load up the local row counts */ 1233 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1234 for (i=0; i<mat->rmap->n; i++) { 1235 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1236 } 1237 1238 /* store the row lengths to the file */ 1239 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1240 if (!rank) { 1241 MPI_Status status; 1242 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1243 for (i=1; i<size; i++) { 1244 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1245 rlen = range[i+1] - range[i]; 1246 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1247 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1248 } 1249 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1250 } else { 1251 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1252 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1253 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1254 } 1255 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1256 1257 /* load up the local column indices */ 1258 nzmax = nz; /* )th processor needs space a largest processor needs */ 1259 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1260 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1261 cnt = 0; 1262 for (i=0; i<mat->rmap->n; i++) { 1263 for (j=B->i[i]; j<B->i[i+1]; j++) { 1264 if ( (col = garray[B->j[j]]) > cstart) break; 1265 column_indices[cnt++] = col; 1266 } 1267 for (k=A->i[i]; k<A->i[i+1]; k++) { 1268 column_indices[cnt++] = A->j[k] + cstart; 1269 } 1270 for (; j<B->i[i+1]; j++) { 1271 column_indices[cnt++] = garray[B->j[j]]; 1272 } 1273 } 1274 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1275 1276 /* store the column indices to the file */ 1277 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1278 if (!rank) { 1279 MPI_Status status; 1280 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1283 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1284 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1285 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1286 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1291 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1292 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1293 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1296 1297 /* load up the local column values */ 1298 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ( garray[B->j[j]] > cstart) break; 1303 column_values[cnt++] = B->a[j]; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) { 1306 column_values[cnt++] = A->a[k]; 1307 } 1308 for (; j<B->i[i+1]; j++) { 1309 column_values[cnt++] = B->a[j]; 1310 } 1311 } 1312 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1313 1314 /* store the column values to the file */ 1315 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1316 if (!rank) { 1317 MPI_Status status; 1318 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1319 for (i=1; i<size; i++) { 1320 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1321 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1322 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1323 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1324 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1325 } 1326 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1329 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1331 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1332 } 1333 ierr = PetscFree(column_values);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1339 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 PetscErrorCode ierr; 1343 PetscMPIInt rank = aij->rank,size = aij->size; 1344 PetscBool isdraw,iascii,isbinary; 1345 PetscViewer sviewer; 1346 PetscViewerFormat format; 1347 1348 PetscFunctionBegin; 1349 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1350 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1351 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1352 if (iascii) { 1353 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1354 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1355 MatInfo info; 1356 PetscBool inodes; 1357 1358 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1359 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1360 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1362 if (!inodes) { 1363 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1364 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1367 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1368 } 1369 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1370 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1371 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1372 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1373 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1374 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1375 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1376 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1379 PetscInt inodecount,inodelimit,*inodes; 1380 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1381 if (inodes) { 1382 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1383 } else { 1384 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1385 } 1386 PetscFunctionReturn(0); 1387 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1388 PetscFunctionReturn(0); 1389 } 1390 } else if (isbinary) { 1391 if (size == 1) { 1392 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1393 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1394 } else { 1395 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } else if (isdraw) { 1399 PetscDraw draw; 1400 PetscBool isnull; 1401 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1402 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1403 } 1404 1405 if (size == 1) { 1406 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1407 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1408 } else { 1409 /* assemble the entire matrix onto first processor. */ 1410 Mat A; 1411 Mat_SeqAIJ *Aloc; 1412 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1413 MatScalar *a; 1414 1415 if (mat->rmap->N > 1024) { 1416 PetscBool flg = PETSC_FALSE; 1417 1418 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1419 if (!flg) { 1420 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1421 } 1422 } 1423 1424 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1425 if (!rank) { 1426 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1427 } else { 1428 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1429 } 1430 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1431 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1432 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1433 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1434 1435 /* copy over the A part */ 1436 Aloc = (Mat_SeqAIJ*)aij->A->data; 1437 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1438 row = mat->rmap->rstart; 1439 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1440 for (i=0; i<m; i++) { 1441 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1442 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1443 } 1444 aj = Aloc->j; 1445 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1446 1447 /* copy over the B part */ 1448 Aloc = (Mat_SeqAIJ*)aij->B->data; 1449 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1450 row = mat->rmap->rstart; 1451 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1452 ct = cols; 1453 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1454 for (i=0; i<m; i++) { 1455 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1456 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1457 } 1458 ierr = PetscFree(ct);CHKERRQ(ierr); 1459 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1460 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 /* 1462 Everyone has to call to draw the matrix since the graphics waits are 1463 synchronized across all processors that share the PetscDraw object 1464 */ 1465 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1466 if (!rank) { 1467 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1468 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1469 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1470 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1471 } 1472 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1473 ierr = MatDestroy(A);CHKERRQ(ierr); 1474 } 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatView_MPIAIJ" 1480 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1481 { 1482 PetscErrorCode ierr; 1483 PetscBool iascii,isdraw,issocket,isbinary; 1484 1485 PetscFunctionBegin; 1486 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1487 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1488 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1489 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1490 if (iascii || isdraw || isbinary || issocket) { 1491 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1492 } else { 1493 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1494 } 1495 PetscFunctionReturn(0); 1496 } 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatSOR_MPIAIJ" 1500 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1501 { 1502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1503 PetscErrorCode ierr; 1504 Vec bb1 = 0; 1505 PetscBool hasop; 1506 1507 PetscFunctionBegin; 1508 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1509 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1510 } 1511 1512 if (flag == SOR_APPLY_UPPER) { 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1518 if (flag & SOR_ZERO_INITIAL_GUESS) { 1519 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1520 its--; 1521 } 1522 1523 while (its--) { 1524 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1526 1527 /* update rhs: bb1 = bb - B*x */ 1528 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1529 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1530 1531 /* local sweep */ 1532 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1533 } 1534 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1535 if (flag & SOR_ZERO_INITIAL_GUESS) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 its--; 1538 } 1539 while (its--) { 1540 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1541 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1542 1543 /* update rhs: bb1 = bb - B*x */ 1544 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1545 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1546 1547 /* local sweep */ 1548 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1549 } 1550 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1551 if (flag & SOR_ZERO_INITIAL_GUESS) { 1552 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1553 its--; 1554 } 1555 while (its--) { 1556 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 1559 /* update rhs: bb1 = bb - B*x */ 1560 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1561 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1562 1563 /* local sweep */ 1564 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1565 } 1566 } else if (flag & SOR_EISENSTAT) { 1567 Vec xx1; 1568 1569 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1570 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1571 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 if (!mat->diag) { 1575 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1576 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1577 } 1578 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1579 if (hasop) { 1580 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1581 } else { 1582 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1583 } 1584 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1585 1586 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1587 1588 /* local sweep */ 1589 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1590 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1591 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1594 } 1595 1596 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatPermute_MPIAIJ" 1602 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1603 { 1604 MPI_Comm comm,pcomm; 1605 PetscInt first,local_size,nrows; 1606 const PetscInt *rows; 1607 PetscMPIInt size; 1608 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1613 /* make a collective version of 'rowp' */ 1614 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1615 if (pcomm==comm) { 1616 crowp = rowp; 1617 } else { 1618 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1619 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1620 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1621 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1622 } 1623 /* collect the global row permutation and invert it */ 1624 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1625 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1626 if (pcomm!=comm) { 1627 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1628 } 1629 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1630 /* get the local target indices */ 1631 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1632 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1633 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1634 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1635 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1636 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1637 /* the column permutation is so much easier; 1638 make a local version of 'colp' and invert it */ 1639 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1640 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1641 if (size==1) { 1642 lcolp = colp; 1643 } else { 1644 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1645 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1646 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1647 } 1648 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1649 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1650 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1651 if (size>1) { 1652 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1653 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1654 } 1655 /* now we just get the submatrix */ 1656 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1657 /* clean up */ 1658 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1659 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1665 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1666 { 1667 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1668 Mat A = mat->A,B = mat->B; 1669 PetscErrorCode ierr; 1670 PetscReal isend[5],irecv[5]; 1671 1672 PetscFunctionBegin; 1673 info->block_size = 1.0; 1674 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1675 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1676 isend[3] = info->memory; isend[4] = info->mallocs; 1677 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1678 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1679 isend[3] += info->memory; isend[4] += info->mallocs; 1680 if (flag == MAT_LOCAL) { 1681 info->nz_used = isend[0]; 1682 info->nz_allocated = isend[1]; 1683 info->nz_unneeded = isend[2]; 1684 info->memory = isend[3]; 1685 info->mallocs = isend[4]; 1686 } else if (flag == MAT_GLOBAL_MAX) { 1687 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1688 info->nz_used = irecv[0]; 1689 info->nz_allocated = irecv[1]; 1690 info->nz_unneeded = irecv[2]; 1691 info->memory = irecv[3]; 1692 info->mallocs = irecv[4]; 1693 } else if (flag == MAT_GLOBAL_SUM) { 1694 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1695 info->nz_used = irecv[0]; 1696 info->nz_allocated = irecv[1]; 1697 info->nz_unneeded = irecv[2]; 1698 info->memory = irecv[3]; 1699 info->mallocs = irecv[4]; 1700 } 1701 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1702 info->fill_ratio_needed = 0; 1703 info->factor_mallocs = 0; 1704 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatSetOption_MPIAIJ" 1710 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1711 { 1712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 switch (op) { 1717 case MAT_NEW_NONZERO_LOCATIONS: 1718 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1719 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1720 case MAT_KEEP_NONZERO_PATTERN: 1721 case MAT_NEW_NONZERO_LOCATION_ERR: 1722 case MAT_USE_INODES: 1723 case MAT_IGNORE_ZERO_ENTRIES: 1724 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1725 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_ROW_ORIENTED: 1728 a->roworiented = flg; 1729 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1730 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1731 break; 1732 case MAT_NEW_DIAGONALS: 1733 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1734 break; 1735 case MAT_IGNORE_OFF_PROC_ENTRIES: 1736 a->donotstash = PETSC_TRUE; 1737 break; 1738 case MAT_SPD: 1739 A->spd_set = PETSC_TRUE; 1740 A->spd = flg; 1741 if (flg) { 1742 A->symmetric = PETSC_TRUE; 1743 A->structurally_symmetric = PETSC_TRUE; 1744 A->symmetric_set = PETSC_TRUE; 1745 A->structurally_symmetric_set = PETSC_TRUE; 1746 } 1747 break; 1748 case MAT_SYMMETRIC: 1749 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1750 break; 1751 case MAT_STRUCTURALLY_SYMMETRIC: 1752 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1753 break; 1754 case MAT_HERMITIAN: 1755 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1756 break; 1757 case MAT_SYMMETRY_ETERNAL: 1758 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1759 break; 1760 default: 1761 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1762 } 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatGetRow_MPIAIJ" 1768 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1769 { 1770 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1771 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1772 PetscErrorCode ierr; 1773 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1774 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1775 PetscInt *cmap,*idx_p; 1776 1777 PetscFunctionBegin; 1778 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1779 mat->getrowactive = PETSC_TRUE; 1780 1781 if (!mat->rowvalues && (idx || v)) { 1782 /* 1783 allocate enough space to hold information from the longest row. 1784 */ 1785 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1786 PetscInt max = 1,tmp; 1787 for (i=0; i<matin->rmap->n; i++) { 1788 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1789 if (max < tmp) { max = tmp; } 1790 } 1791 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1792 } 1793 1794 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1795 lrow = row - rstart; 1796 1797 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1798 if (!v) {pvA = 0; pvB = 0;} 1799 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1800 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1801 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1802 nztot = nzA + nzB; 1803 1804 cmap = mat->garray; 1805 if (v || idx) { 1806 if (nztot) { 1807 /* Sort by increasing column numbers, assuming A and B already sorted */ 1808 PetscInt imark = -1; 1809 if (v) { 1810 *v = v_p = mat->rowvalues; 1811 for (i=0; i<nzB; i++) { 1812 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1813 else break; 1814 } 1815 imark = i; 1816 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1817 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1818 } 1819 if (idx) { 1820 *idx = idx_p = mat->rowindices; 1821 if (imark > -1) { 1822 for (i=0; i<imark; i++) { 1823 idx_p[i] = cmap[cworkB[i]]; 1824 } 1825 } else { 1826 for (i=0; i<nzB; i++) { 1827 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1828 else break; 1829 } 1830 imark = i; 1831 } 1832 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1833 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1834 } 1835 } else { 1836 if (idx) *idx = 0; 1837 if (v) *v = 0; 1838 } 1839 } 1840 *nz = nztot; 1841 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1842 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1848 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1849 { 1850 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1851 1852 PetscFunctionBegin; 1853 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1854 aij->getrowactive = PETSC_FALSE; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatNorm_MPIAIJ" 1860 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1861 { 1862 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1863 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1864 PetscErrorCode ierr; 1865 PetscInt i,j,cstart = mat->cmap->rstart; 1866 PetscReal sum = 0.0; 1867 MatScalar *v; 1868 1869 PetscFunctionBegin; 1870 if (aij->size == 1) { 1871 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1872 } else { 1873 if (type == NORM_FROBENIUS) { 1874 v = amat->a; 1875 for (i=0; i<amat->nz; i++) { 1876 #if defined(PETSC_USE_COMPLEX) 1877 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1878 #else 1879 sum += (*v)*(*v); v++; 1880 #endif 1881 } 1882 v = bmat->a; 1883 for (i=0; i<bmat->nz; i++) { 1884 #if defined(PETSC_USE_COMPLEX) 1885 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1886 #else 1887 sum += (*v)*(*v); v++; 1888 #endif 1889 } 1890 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1891 *norm = sqrt(*norm); 1892 } else if (type == NORM_1) { /* max column norm */ 1893 PetscReal *tmp,*tmp2; 1894 PetscInt *jj,*garray = aij->garray; 1895 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1896 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1897 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1898 *norm = 0.0; 1899 v = amat->a; jj = amat->j; 1900 for (j=0; j<amat->nz; j++) { 1901 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1902 } 1903 v = bmat->a; jj = bmat->j; 1904 for (j=0; j<bmat->nz; j++) { 1905 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1906 } 1907 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1908 for (j=0; j<mat->cmap->N; j++) { 1909 if (tmp2[j] > *norm) *norm = tmp2[j]; 1910 } 1911 ierr = PetscFree(tmp);CHKERRQ(ierr); 1912 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1913 } else if (type == NORM_INFINITY) { /* max row norm */ 1914 PetscReal ntemp = 0.0; 1915 for (j=0; j<aij->A->rmap->n; j++) { 1916 v = amat->a + amat->i[j]; 1917 sum = 0.0; 1918 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1919 sum += PetscAbsScalar(*v); v++; 1920 } 1921 v = bmat->a + bmat->i[j]; 1922 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1923 sum += PetscAbsScalar(*v); v++; 1924 } 1925 if (sum > ntemp) ntemp = sum; 1926 } 1927 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1928 } else { 1929 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1930 } 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatTranspose_MPIAIJ" 1937 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1938 { 1939 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1940 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1941 PetscErrorCode ierr; 1942 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1943 PetscInt cstart=A->cmap->rstart,ncol; 1944 Mat B; 1945 MatScalar *array; 1946 1947 PetscFunctionBegin; 1948 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1949 1950 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1951 ai = Aloc->i; aj = Aloc->j; 1952 bi = Bloc->i; bj = Bloc->j; 1953 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1954 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1955 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1956 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1957 for (i=0; i<ai[ma]; i++){ 1958 d_nnz[aj[i]] ++; 1959 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1960 } 1961 1962 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1963 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1964 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1965 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1966 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1967 } else { 1968 B = *matout; 1969 } 1970 1971 /* copy over the A part */ 1972 array = Aloc->a; 1973 row = A->rmap->rstart; 1974 for (i=0; i<ma; i++) { 1975 ncol = ai[i+1]-ai[i]; 1976 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1977 row++; array += ncol; aj += ncol; 1978 } 1979 aj = Aloc->j; 1980 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1981 1982 /* copy over the B part */ 1983 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1984 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1985 array = Bloc->a; 1986 row = A->rmap->rstart; 1987 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1988 cols_tmp = cols; 1989 for (i=0; i<mb; i++) { 1990 ncol = bi[i+1]-bi[i]; 1991 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1992 row++; array += ncol; cols_tmp += ncol; 1993 } 1994 ierr = PetscFree(cols);CHKERRQ(ierr); 1995 1996 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1997 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1998 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1999 *matout = B; 2000 } else { 2001 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2002 } 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2008 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 Mat a = aij->A,b = aij->B; 2012 PetscErrorCode ierr; 2013 PetscInt s1,s2,s3; 2014 2015 PetscFunctionBegin; 2016 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2017 if (rr) { 2018 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2019 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2020 /* Overlap communication with computation. */ 2021 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 } 2023 if (ll) { 2024 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2025 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2026 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2027 } 2028 /* scale the diagonal block */ 2029 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2030 2031 if (rr) { 2032 /* Do a scatter end and then right scale the off-diagonal block */ 2033 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2035 } 2036 2037 PetscFunctionReturn(0); 2038 } 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2042 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2043 { 2044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2049 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2050 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2051 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2056 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2057 { 2058 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2059 PetscErrorCode ierr; 2060 2061 PetscFunctionBegin; 2062 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatEqual_MPIAIJ" 2068 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2069 { 2070 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2071 Mat a,b,c,d; 2072 PetscBool flg; 2073 PetscErrorCode ierr; 2074 2075 PetscFunctionBegin; 2076 a = matA->A; b = matA->B; 2077 c = matB->A; d = matB->B; 2078 2079 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2080 if (flg) { 2081 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2082 } 2083 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatCopy_MPIAIJ" 2089 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2090 { 2091 PetscErrorCode ierr; 2092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2093 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2094 2095 PetscFunctionBegin; 2096 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2097 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2098 /* because of the column compression in the off-processor part of the matrix a->B, 2099 the number of columns in a->B and b->B may be different, hence we cannot call 2100 the MatCopy() directly on the two parts. If need be, we can provide a more 2101 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2102 then copying the submatrices */ 2103 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2104 } else { 2105 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2106 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2107 } 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2113 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2114 { 2115 PetscErrorCode ierr; 2116 2117 PetscFunctionBegin; 2118 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatAXPY_MPIAIJ" 2124 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2125 { 2126 PetscErrorCode ierr; 2127 PetscInt i; 2128 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2129 PetscBLASInt bnz,one=1; 2130 Mat_SeqAIJ *x,*y; 2131 2132 PetscFunctionBegin; 2133 if (str == SAME_NONZERO_PATTERN) { 2134 PetscScalar alpha = a; 2135 x = (Mat_SeqAIJ *)xx->A->data; 2136 y = (Mat_SeqAIJ *)yy->A->data; 2137 bnz = PetscBLASIntCast(x->nz); 2138 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2139 x = (Mat_SeqAIJ *)xx->B->data; 2140 y = (Mat_SeqAIJ *)yy->B->data; 2141 bnz = PetscBLASIntCast(x->nz); 2142 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2143 } else if (str == SUBSET_NONZERO_PATTERN) { 2144 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2145 2146 x = (Mat_SeqAIJ *)xx->B->data; 2147 y = (Mat_SeqAIJ *)yy->B->data; 2148 if (y->xtoy && y->XtoY != xx->B) { 2149 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2150 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2151 } 2152 if (!y->xtoy) { /* get xtoy */ 2153 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2154 y->XtoY = xx->B; 2155 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2156 } 2157 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2158 } else { 2159 Mat B; 2160 PetscInt *nnz_d,*nnz_o; 2161 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2162 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2163 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2164 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2165 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2166 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2167 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2168 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2169 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2170 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2171 ierr = MatHeaderReplace(Y,B); 2172 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2173 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2174 } 2175 PetscFunctionReturn(0); 2176 } 2177 2178 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatConjugate_MPIAIJ" 2182 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2183 { 2184 #if defined(PETSC_USE_COMPLEX) 2185 PetscErrorCode ierr; 2186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2187 2188 PetscFunctionBegin; 2189 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2190 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2191 #else 2192 PetscFunctionBegin; 2193 #endif 2194 PetscFunctionReturn(0); 2195 } 2196 2197 #undef __FUNCT__ 2198 #define __FUNCT__ "MatRealPart_MPIAIJ" 2199 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2200 { 2201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2206 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2212 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2213 { 2214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2219 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #ifdef PETSC_HAVE_PBGL 2224 2225 #include <boost/parallel/mpi/bsp_process_group.hpp> 2226 #include <boost/graph/distributed/ilu_default_graph.hpp> 2227 #include <boost/graph/distributed/ilu_0_block.hpp> 2228 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2229 #include <boost/graph/distributed/petsc/interface.hpp> 2230 #include <boost/multi_array.hpp> 2231 #include <boost/parallel/distributed_property_map->hpp> 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2235 /* 2236 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2237 */ 2238 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2239 { 2240 namespace petsc = boost::distributed::petsc; 2241 2242 namespace graph_dist = boost::graph::distributed; 2243 using boost::graph::distributed::ilu_default::process_group_type; 2244 using boost::graph::ilu_permuted; 2245 2246 PetscBool row_identity, col_identity; 2247 PetscContainer c; 2248 PetscInt m, n, M, N; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2253 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2254 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2255 if (!row_identity || !col_identity) { 2256 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2257 } 2258 2259 process_group_type pg; 2260 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2261 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2262 lgraph_type& level_graph = *lgraph_p; 2263 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2264 2265 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2266 ilu_permuted(level_graph); 2267 2268 /* put together the new matrix */ 2269 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2270 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2271 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2272 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2273 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2274 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2275 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2276 2277 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2278 ierr = PetscContainerSetPointer(c, lgraph_p); 2279 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2285 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2286 { 2287 PetscFunctionBegin; 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatSolve_MPIAIJ" 2293 /* 2294 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2295 */ 2296 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2297 { 2298 namespace graph_dist = boost::graph::distributed; 2299 2300 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2301 lgraph_type* lgraph_p; 2302 PetscContainer c; 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2307 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2308 ierr = VecCopy(b, x);CHKERRQ(ierr); 2309 2310 PetscScalar* array_x; 2311 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2312 PetscInt sx; 2313 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2314 2315 PetscScalar* array_b; 2316 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2317 PetscInt sb; 2318 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2319 2320 lgraph_type& level_graph = *lgraph_p; 2321 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2322 2323 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2324 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2325 ref_x(array_x, boost::extents[num_vertices(graph)]); 2326 2327 typedef boost::iterator_property_map<array_ref_type::iterator, 2328 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2329 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2330 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2331 2332 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2333 2334 PetscFunctionReturn(0); 2335 } 2336 #endif 2337 2338 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2339 PetscInt nzlocal,nsends,nrecvs; 2340 PetscMPIInt *send_rank,*recv_rank; 2341 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2342 PetscScalar *sbuf_a,**rbuf_a; 2343 PetscErrorCode (*MatDestroy)(Mat); 2344 } Mat_Redundant; 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2348 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2349 { 2350 PetscErrorCode ierr; 2351 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2352 PetscInt i; 2353 2354 PetscFunctionBegin; 2355 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2356 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2357 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2358 for (i=0; i<redund->nrecvs; i++){ 2359 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2360 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2361 } 2362 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2363 ierr = PetscFree(redund);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatDestroy_MatRedundant" 2369 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2370 { 2371 PetscErrorCode ierr; 2372 PetscContainer container; 2373 Mat_Redundant *redund=PETSC_NULL; 2374 2375 PetscFunctionBegin; 2376 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2377 if (container) { 2378 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2379 } else { 2380 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2381 } 2382 A->ops->destroy = redund->MatDestroy; 2383 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2384 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2385 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2391 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2392 { 2393 PetscMPIInt rank,size; 2394 MPI_Comm comm=((PetscObject)mat)->comm; 2395 PetscErrorCode ierr; 2396 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2397 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2398 PetscInt *rowrange=mat->rmap->range; 2399 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2400 Mat A=aij->A,B=aij->B,C=*matredundant; 2401 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2402 PetscScalar *sbuf_a; 2403 PetscInt nzlocal=a->nz+b->nz; 2404 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2405 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2406 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2407 MatScalar *aworkA,*aworkB; 2408 PetscScalar *vals; 2409 PetscMPIInt tag1,tag2,tag3,imdex; 2410 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2411 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2412 MPI_Status recv_status,*send_status; 2413 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2414 PetscInt **rbuf_j=PETSC_NULL; 2415 PetscScalar **rbuf_a=PETSC_NULL; 2416 Mat_Redundant *redund=PETSC_NULL; 2417 PetscContainer container; 2418 2419 PetscFunctionBegin; 2420 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2421 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2422 2423 if (reuse == MAT_REUSE_MATRIX) { 2424 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2425 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2426 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2427 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2428 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2429 if (container) { 2430 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2431 } else { 2432 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2433 } 2434 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2435 2436 nsends = redund->nsends; 2437 nrecvs = redund->nrecvs; 2438 send_rank = redund->send_rank; 2439 recv_rank = redund->recv_rank; 2440 sbuf_nz = redund->sbuf_nz; 2441 rbuf_nz = redund->rbuf_nz; 2442 sbuf_j = redund->sbuf_j; 2443 sbuf_a = redund->sbuf_a; 2444 rbuf_j = redund->rbuf_j; 2445 rbuf_a = redund->rbuf_a; 2446 } 2447 2448 if (reuse == MAT_INITIAL_MATRIX){ 2449 PetscMPIInt subrank,subsize; 2450 PetscInt nleftover,np_subcomm; 2451 /* get the destination processors' id send_rank, nsends and nrecvs */ 2452 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2453 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2454 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2455 np_subcomm = size/nsubcomm; 2456 nleftover = size - nsubcomm*np_subcomm; 2457 nsends = 0; nrecvs = 0; 2458 for (i=0; i<size; i++){ /* i=rank*/ 2459 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2460 send_rank[nsends] = i; nsends++; 2461 recv_rank[nrecvs++] = i; 2462 } 2463 } 2464 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2465 i = size-nleftover-1; 2466 j = 0; 2467 while (j < nsubcomm - nleftover){ 2468 send_rank[nsends++] = i; 2469 i--; j++; 2470 } 2471 } 2472 2473 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2474 for (i=0; i<nleftover; i++){ 2475 recv_rank[nrecvs++] = size-nleftover+i; 2476 } 2477 } 2478 2479 /* allocate sbuf_j, sbuf_a */ 2480 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2481 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2482 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2483 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2484 2485 /* copy mat's local entries into the buffers */ 2486 if (reuse == MAT_INITIAL_MATRIX){ 2487 rownz_max = 0; 2488 rptr = sbuf_j; 2489 cols = sbuf_j + rend-rstart + 1; 2490 vals = sbuf_a; 2491 rptr[0] = 0; 2492 for (i=0; i<rend-rstart; i++){ 2493 row = i + rstart; 2494 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2495 ncols = nzA + nzB; 2496 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2497 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2498 /* load the column indices for this row into cols */ 2499 lwrite = 0; 2500 for (l=0; l<nzB; l++) { 2501 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2502 vals[lwrite] = aworkB[l]; 2503 cols[lwrite++] = ctmp; 2504 } 2505 } 2506 for (l=0; l<nzA; l++){ 2507 vals[lwrite] = aworkA[l]; 2508 cols[lwrite++] = cstart + cworkA[l]; 2509 } 2510 for (l=0; l<nzB; l++) { 2511 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2512 vals[lwrite] = aworkB[l]; 2513 cols[lwrite++] = ctmp; 2514 } 2515 } 2516 vals += ncols; 2517 cols += ncols; 2518 rptr[i+1] = rptr[i] + ncols; 2519 if (rownz_max < ncols) rownz_max = ncols; 2520 } 2521 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2522 } else { /* only copy matrix values into sbuf_a */ 2523 rptr = sbuf_j; 2524 vals = sbuf_a; 2525 rptr[0] = 0; 2526 for (i=0; i<rend-rstart; i++){ 2527 row = i + rstart; 2528 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2529 ncols = nzA + nzB; 2530 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2531 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2532 lwrite = 0; 2533 for (l=0; l<nzB; l++) { 2534 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2535 } 2536 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2537 for (l=0; l<nzB; l++) { 2538 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2539 } 2540 vals += ncols; 2541 rptr[i+1] = rptr[i] + ncols; 2542 } 2543 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2544 2545 /* send nzlocal to others, and recv other's nzlocal */ 2546 /*--------------------------------------------------*/ 2547 if (reuse == MAT_INITIAL_MATRIX){ 2548 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2549 s_waits2 = s_waits3 + nsends; 2550 s_waits1 = s_waits2 + nsends; 2551 r_waits1 = s_waits1 + nsends; 2552 r_waits2 = r_waits1 + nrecvs; 2553 r_waits3 = r_waits2 + nrecvs; 2554 } else { 2555 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2556 r_waits3 = s_waits3 + nsends; 2557 } 2558 2559 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2560 if (reuse == MAT_INITIAL_MATRIX){ 2561 /* get new tags to keep the communication clean */ 2562 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2564 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2565 2566 /* post receives of other's nzlocal */ 2567 for (i=0; i<nrecvs; i++){ 2568 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2569 } 2570 /* send nzlocal to others */ 2571 for (i=0; i<nsends; i++){ 2572 sbuf_nz[i] = nzlocal; 2573 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2574 } 2575 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2576 count = nrecvs; 2577 while (count) { 2578 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2579 recv_rank[imdex] = recv_status.MPI_SOURCE; 2580 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2581 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2582 2583 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2584 rbuf_nz[imdex] += i + 2; 2585 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2586 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2587 count--; 2588 } 2589 /* wait on sends of nzlocal */ 2590 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2591 /* send mat->i,j to others, and recv from other's */ 2592 /*------------------------------------------------*/ 2593 for (i=0; i<nsends; i++){ 2594 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2595 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2596 } 2597 /* wait on receives of mat->i,j */ 2598 /*------------------------------*/ 2599 count = nrecvs; 2600 while (count) { 2601 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2602 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2603 count--; 2604 } 2605 /* wait on sends of mat->i,j */ 2606 /*---------------------------*/ 2607 if (nsends) { 2608 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2609 } 2610 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2611 2612 /* post receives, send and receive mat->a */ 2613 /*----------------------------------------*/ 2614 for (imdex=0; imdex<nrecvs; imdex++) { 2615 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2616 } 2617 for (i=0; i<nsends; i++){ 2618 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2619 } 2620 count = nrecvs; 2621 while (count) { 2622 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2623 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2624 count--; 2625 } 2626 if (nsends) { 2627 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2628 } 2629 2630 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2631 2632 /* create redundant matrix */ 2633 /*-------------------------*/ 2634 if (reuse == MAT_INITIAL_MATRIX){ 2635 /* compute rownz_max for preallocation */ 2636 for (imdex=0; imdex<nrecvs; imdex++){ 2637 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2638 rptr = rbuf_j[imdex]; 2639 for (i=0; i<j; i++){ 2640 ncols = rptr[i+1] - rptr[i]; 2641 if (rownz_max < ncols) rownz_max = ncols; 2642 } 2643 } 2644 2645 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2646 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2647 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2648 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2649 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2650 } else { 2651 C = *matredundant; 2652 } 2653 2654 /* insert local matrix entries */ 2655 rptr = sbuf_j; 2656 cols = sbuf_j + rend-rstart + 1; 2657 vals = sbuf_a; 2658 for (i=0; i<rend-rstart; i++){ 2659 row = i + rstart; 2660 ncols = rptr[i+1] - rptr[i]; 2661 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2662 vals += ncols; 2663 cols += ncols; 2664 } 2665 /* insert received matrix entries */ 2666 for (imdex=0; imdex<nrecvs; imdex++){ 2667 rstart = rowrange[recv_rank[imdex]]; 2668 rend = rowrange[recv_rank[imdex]+1]; 2669 rptr = rbuf_j[imdex]; 2670 cols = rbuf_j[imdex] + rend-rstart + 1; 2671 vals = rbuf_a[imdex]; 2672 for (i=0; i<rend-rstart; i++){ 2673 row = i + rstart; 2674 ncols = rptr[i+1] - rptr[i]; 2675 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2676 vals += ncols; 2677 cols += ncols; 2678 } 2679 } 2680 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2681 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2682 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2683 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2684 if (reuse == MAT_INITIAL_MATRIX){ 2685 PetscContainer container; 2686 *matredundant = C; 2687 /* create a supporting struct and attach it to C for reuse */ 2688 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2689 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2690 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2691 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2692 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2693 2694 redund->nzlocal = nzlocal; 2695 redund->nsends = nsends; 2696 redund->nrecvs = nrecvs; 2697 redund->send_rank = send_rank; 2698 redund->recv_rank = recv_rank; 2699 redund->sbuf_nz = sbuf_nz; 2700 redund->rbuf_nz = rbuf_nz; 2701 redund->sbuf_j = sbuf_j; 2702 redund->sbuf_a = sbuf_a; 2703 redund->rbuf_j = rbuf_j; 2704 redund->rbuf_a = rbuf_a; 2705 2706 redund->MatDestroy = C->ops->destroy; 2707 C->ops->destroy = MatDestroy_MatRedundant; 2708 } 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2714 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2717 PetscErrorCode ierr; 2718 PetscInt i,*idxb = 0; 2719 PetscScalar *va,*vb; 2720 Vec vtmp; 2721 2722 PetscFunctionBegin; 2723 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2724 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2725 if (idx) { 2726 for (i=0; i<A->rmap->n; i++) { 2727 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2728 } 2729 } 2730 2731 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2732 if (idx) { 2733 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2734 } 2735 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2736 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2737 2738 for (i=0; i<A->rmap->n; i++){ 2739 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2740 va[i] = vb[i]; 2741 if (idx) idx[i] = a->garray[idxb[i]]; 2742 } 2743 } 2744 2745 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2746 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2747 ierr = PetscFree(idxb);CHKERRQ(ierr); 2748 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 #undef __FUNCT__ 2753 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2754 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2755 { 2756 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2757 PetscErrorCode ierr; 2758 PetscInt i,*idxb = 0; 2759 PetscScalar *va,*vb; 2760 Vec vtmp; 2761 2762 PetscFunctionBegin; 2763 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2764 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2765 if (idx) { 2766 for (i=0; i<A->cmap->n; i++) { 2767 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2768 } 2769 } 2770 2771 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2772 if (idx) { 2773 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2774 } 2775 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2776 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2777 2778 for (i=0; i<A->rmap->n; i++){ 2779 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2780 va[i] = vb[i]; 2781 if (idx) idx[i] = a->garray[idxb[i]]; 2782 } 2783 } 2784 2785 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2786 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2787 ierr = PetscFree(idxb);CHKERRQ(ierr); 2788 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2789 PetscFunctionReturn(0); 2790 } 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2794 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2795 { 2796 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2797 PetscInt n = A->rmap->n; 2798 PetscInt cstart = A->cmap->rstart; 2799 PetscInt *cmap = mat->garray; 2800 PetscInt *diagIdx, *offdiagIdx; 2801 Vec diagV, offdiagV; 2802 PetscScalar *a, *diagA, *offdiagA; 2803 PetscInt r; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2808 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2809 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2810 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2811 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2812 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2813 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2814 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2815 for(r = 0; r < n; ++r) { 2816 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2817 a[r] = diagA[r]; 2818 idx[r] = cstart + diagIdx[r]; 2819 } else { 2820 a[r] = offdiagA[r]; 2821 idx[r] = cmap[offdiagIdx[r]]; 2822 } 2823 } 2824 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2825 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2826 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2827 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2828 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2829 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2835 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2836 { 2837 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2838 PetscInt n = A->rmap->n; 2839 PetscInt cstart = A->cmap->rstart; 2840 PetscInt *cmap = mat->garray; 2841 PetscInt *diagIdx, *offdiagIdx; 2842 Vec diagV, offdiagV; 2843 PetscScalar *a, *diagA, *offdiagA; 2844 PetscInt r; 2845 PetscErrorCode ierr; 2846 2847 PetscFunctionBegin; 2848 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2849 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2850 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2851 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2852 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2853 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2854 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2855 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2856 for(r = 0; r < n; ++r) { 2857 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2858 a[r] = diagA[r]; 2859 idx[r] = cstart + diagIdx[r]; 2860 } else { 2861 a[r] = offdiagA[r]; 2862 idx[r] = cmap[offdiagIdx[r]]; 2863 } 2864 } 2865 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2866 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2867 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2868 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2869 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2870 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2876 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2877 { 2878 PetscErrorCode ierr; 2879 Mat *dummy; 2880 2881 PetscFunctionBegin; 2882 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2883 *newmat = *dummy; 2884 ierr = PetscFree(dummy);CHKERRQ(ierr); 2885 PetscFunctionReturn(0); 2886 } 2887 2888 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2889 /* -------------------------------------------------------------------*/ 2890 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2891 MatGetRow_MPIAIJ, 2892 MatRestoreRow_MPIAIJ, 2893 MatMult_MPIAIJ, 2894 /* 4*/ MatMultAdd_MPIAIJ, 2895 MatMultTranspose_MPIAIJ, 2896 MatMultTransposeAdd_MPIAIJ, 2897 #ifdef PETSC_HAVE_PBGL 2898 MatSolve_MPIAIJ, 2899 #else 2900 0, 2901 #endif 2902 0, 2903 0, 2904 /*10*/ 0, 2905 0, 2906 0, 2907 MatSOR_MPIAIJ, 2908 MatTranspose_MPIAIJ, 2909 /*15*/ MatGetInfo_MPIAIJ, 2910 MatEqual_MPIAIJ, 2911 MatGetDiagonal_MPIAIJ, 2912 MatDiagonalScale_MPIAIJ, 2913 MatNorm_MPIAIJ, 2914 /*20*/ MatAssemblyBegin_MPIAIJ, 2915 MatAssemblyEnd_MPIAIJ, 2916 MatSetOption_MPIAIJ, 2917 MatZeroEntries_MPIAIJ, 2918 /*24*/ MatZeroRows_MPIAIJ, 2919 0, 2920 #ifdef PETSC_HAVE_PBGL 2921 0, 2922 #else 2923 0, 2924 #endif 2925 0, 2926 0, 2927 /*29*/ MatSetUpPreallocation_MPIAIJ, 2928 #ifdef PETSC_HAVE_PBGL 2929 0, 2930 #else 2931 0, 2932 #endif 2933 0, 2934 0, 2935 0, 2936 /*34*/ MatDuplicate_MPIAIJ, 2937 0, 2938 0, 2939 0, 2940 0, 2941 /*39*/ MatAXPY_MPIAIJ, 2942 MatGetSubMatrices_MPIAIJ, 2943 MatIncreaseOverlap_MPIAIJ, 2944 MatGetValues_MPIAIJ, 2945 MatCopy_MPIAIJ, 2946 /*44*/ MatGetRowMax_MPIAIJ, 2947 MatScale_MPIAIJ, 2948 0, 2949 0, 2950 MatZeroRowsColumns_MPIAIJ, 2951 /*49*/ MatSetBlockSize_MPIAIJ, 2952 0, 2953 0, 2954 0, 2955 0, 2956 /*54*/ MatFDColoringCreate_MPIAIJ, 2957 0, 2958 MatSetUnfactored_MPIAIJ, 2959 MatPermute_MPIAIJ, 2960 0, 2961 /*59*/ MatGetSubMatrix_MPIAIJ, 2962 MatDestroy_MPIAIJ, 2963 MatView_MPIAIJ, 2964 0, 2965 0, 2966 /*64*/ 0, 2967 0, 2968 0, 2969 0, 2970 0, 2971 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2972 MatGetRowMinAbs_MPIAIJ, 2973 0, 2974 MatSetColoring_MPIAIJ, 2975 #if defined(PETSC_HAVE_ADIC) 2976 MatSetValuesAdic_MPIAIJ, 2977 #else 2978 0, 2979 #endif 2980 MatSetValuesAdifor_MPIAIJ, 2981 /*75*/ MatFDColoringApply_AIJ, 2982 0, 2983 0, 2984 0, 2985 0, 2986 /*80*/ 0, 2987 0, 2988 0, 2989 /*83*/ MatLoad_MPIAIJ, 2990 0, 2991 0, 2992 0, 2993 0, 2994 0, 2995 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2996 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2997 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2998 MatPtAP_Basic, 2999 MatPtAPSymbolic_MPIAIJ, 3000 /*94*/ MatPtAPNumeric_MPIAIJ, 3001 0, 3002 0, 3003 0, 3004 0, 3005 /*99*/ 0, 3006 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3007 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3008 MatConjugate_MPIAIJ, 3009 0, 3010 /*104*/MatSetValuesRow_MPIAIJ, 3011 MatRealPart_MPIAIJ, 3012 MatImaginaryPart_MPIAIJ, 3013 0, 3014 0, 3015 /*109*/0, 3016 MatGetRedundantMatrix_MPIAIJ, 3017 MatGetRowMin_MPIAIJ, 3018 0, 3019 0, 3020 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3021 0, 3022 0, 3023 0, 3024 0, 3025 /*119*/0, 3026 0, 3027 0, 3028 0, 3029 MatGetMultiProcBlock_MPIAIJ, 3030 /*124*/MatFindNonZeroRows_MPIAIJ, 3031 0, 3032 0, 3033 0, 3034 MatGetSubMatricesParallel_MPIAIJ 3035 }; 3036 3037 /* ----------------------------------------------------------------------------------------*/ 3038 3039 EXTERN_C_BEGIN 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3042 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3043 { 3044 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3045 PetscErrorCode ierr; 3046 3047 PetscFunctionBegin; 3048 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3049 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 EXTERN_C_END 3053 3054 EXTERN_C_BEGIN 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3057 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3058 { 3059 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3060 PetscErrorCode ierr; 3061 3062 PetscFunctionBegin; 3063 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3064 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3065 PetscFunctionReturn(0); 3066 } 3067 EXTERN_C_END 3068 3069 EXTERN_C_BEGIN 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3072 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3073 { 3074 Mat_MPIAIJ *b; 3075 PetscErrorCode ierr; 3076 PetscInt i; 3077 3078 PetscFunctionBegin; 3079 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3080 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3081 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3082 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3083 3084 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3085 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3086 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3087 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3088 if (d_nnz) { 3089 for (i=0; i<B->rmap->n; i++) { 3090 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3091 } 3092 } 3093 if (o_nnz) { 3094 for (i=0; i<B->rmap->n; i++) { 3095 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3096 } 3097 } 3098 b = (Mat_MPIAIJ*)B->data; 3099 3100 if (!B->preallocated) { 3101 /* Explicitly create 2 MATSEQAIJ matrices. */ 3102 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3103 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3104 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3105 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3106 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3107 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3108 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3109 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3110 } 3111 3112 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3113 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3114 B->preallocated = PETSC_TRUE; 3115 PetscFunctionReturn(0); 3116 } 3117 EXTERN_C_END 3118 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3121 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3122 { 3123 Mat mat; 3124 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3125 PetscErrorCode ierr; 3126 3127 PetscFunctionBegin; 3128 *newmat = 0; 3129 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3130 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3131 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3132 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3133 a = (Mat_MPIAIJ*)mat->data; 3134 3135 mat->factortype = matin->factortype; 3136 mat->rmap->bs = matin->rmap->bs; 3137 mat->assembled = PETSC_TRUE; 3138 mat->insertmode = NOT_SET_VALUES; 3139 mat->preallocated = PETSC_TRUE; 3140 3141 a->size = oldmat->size; 3142 a->rank = oldmat->rank; 3143 a->donotstash = oldmat->donotstash; 3144 a->roworiented = oldmat->roworiented; 3145 a->rowindices = 0; 3146 a->rowvalues = 0; 3147 a->getrowactive = PETSC_FALSE; 3148 3149 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3150 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3151 3152 if (oldmat->colmap) { 3153 #if defined (PETSC_USE_CTABLE) 3154 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3155 #else 3156 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3157 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3158 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3159 #endif 3160 } else a->colmap = 0; 3161 if (oldmat->garray) { 3162 PetscInt len; 3163 len = oldmat->B->cmap->n; 3164 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3165 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3166 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3167 } else a->garray = 0; 3168 3169 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3170 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3171 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3172 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3173 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3174 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3175 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3176 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3177 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3178 *newmat = mat; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 /* 3183 Allows sending/receiving larger messages then 2 gigabytes in a single call 3184 */ 3185 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3186 { 3187 int ierr; 3188 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3189 PetscInt i,numchunks; 3190 PetscMPIInt icnt; 3191 3192 numchunks = cnt/CHUNKSIZE + 1; 3193 for (i=0; i<numchunks; i++) { 3194 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3195 ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr; 3196 if (type == MPIU_INT) { 3197 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3198 } else if (type == MPIU_SCALAR) { 3199 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3200 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3201 } 3202 return 0; 3203 } 3204 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3205 { 3206 int ierr; 3207 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3208 MPI_Status status; 3209 PetscInt i,numchunks; 3210 PetscMPIInt icnt; 3211 3212 numchunks = cnt/CHUNKSIZE + 1; 3213 for (i=0; i<numchunks; i++) { 3214 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3215 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr; 3216 if (type == MPIU_INT) { 3217 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3218 } else if (type == MPIU_SCALAR) { 3219 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3220 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3221 } 3222 return 0; 3223 } 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatLoad_MPIAIJ" 3227 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3228 { 3229 PetscScalar *vals,*svals; 3230 MPI_Comm comm = ((PetscObject)viewer)->comm; 3231 PetscErrorCode ierr; 3232 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3233 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3234 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3235 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3236 PetscInt cend,cstart,n,*rowners,sizesset=1; 3237 int fd; 3238 3239 PetscFunctionBegin; 3240 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3241 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3242 if (!rank) { 3243 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3244 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3245 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3246 } 3247 3248 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3249 3250 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3251 M = header[1]; N = header[2]; 3252 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3253 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3254 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3255 3256 /* If global sizes are set, check if they are consistent with that given in the file */ 3257 if (sizesset) { 3258 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3259 } 3260 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3261 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3262 3263 /* determine ownership of all rows */ 3264 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3265 else m = newMat->rmap->n; /* Set by user */ 3266 3267 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3269 3270 /* First process needs enough room for process with most rows */ 3271 if (!rank) { 3272 mmax = rowners[1]; 3273 for (i=2; i<size; i++) { 3274 mmax = PetscMax(mmax,rowners[i]); 3275 } 3276 } else mmax = m; 3277 3278 rowners[0] = 0; 3279 for (i=2; i<=size; i++) { 3280 rowners[i] += rowners[i-1]; 3281 } 3282 rstart = rowners[rank]; 3283 rend = rowners[rank+1]; 3284 3285 /* distribute row lengths to all processors */ 3286 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3287 if (!rank) { 3288 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3289 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3290 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3291 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3292 for (j=0; j<m; j++) { 3293 procsnz[0] += ourlens[j]; 3294 } 3295 for (i=1; i<size; i++) { 3296 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3297 /* calculate the number of nonzeros on each processor */ 3298 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3299 procsnz[i] += rowlengths[j]; 3300 } 3301 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3302 } 3303 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3304 } else { 3305 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3306 } 3307 3308 if (!rank) { 3309 /* determine max buffer needed and allocate it */ 3310 maxnz = 0; 3311 for (i=0; i<size; i++) { 3312 maxnz = PetscMax(maxnz,procsnz[i]); 3313 } 3314 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3315 3316 /* read in my part of the matrix column indices */ 3317 nz = procsnz[0]; 3318 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3319 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3320 3321 /* read in every one elses and ship off */ 3322 for (i=1; i<size; i++) { 3323 nz = procsnz[i]; 3324 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3325 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3326 } 3327 ierr = PetscFree(cols);CHKERRQ(ierr); 3328 } else { 3329 /* determine buffer space needed for message */ 3330 nz = 0; 3331 for (i=0; i<m; i++) { 3332 nz += ourlens[i]; 3333 } 3334 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3335 3336 /* receive message of column indices*/ 3337 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3338 } 3339 3340 /* determine column ownership if matrix is not square */ 3341 if (N != M) { 3342 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3343 else n = newMat->cmap->n; 3344 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3345 cstart = cend - n; 3346 } else { 3347 cstart = rstart; 3348 cend = rend; 3349 n = cend - cstart; 3350 } 3351 3352 /* loop over local rows, determining number of off diagonal entries */ 3353 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3354 jj = 0; 3355 for (i=0; i<m; i++) { 3356 for (j=0; j<ourlens[i]; j++) { 3357 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3358 jj++; 3359 } 3360 } 3361 3362 for (i=0; i<m; i++) { 3363 ourlens[i] -= offlens[i]; 3364 } 3365 if (!sizesset) { 3366 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3367 } 3368 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3369 3370 for (i=0; i<m; i++) { 3371 ourlens[i] += offlens[i]; 3372 } 3373 3374 if (!rank) { 3375 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3376 3377 /* read in my part of the matrix numerical values */ 3378 nz = procsnz[0]; 3379 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3380 3381 /* insert into matrix */ 3382 jj = rstart; 3383 smycols = mycols; 3384 svals = vals; 3385 for (i=0; i<m; i++) { 3386 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3387 smycols += ourlens[i]; 3388 svals += ourlens[i]; 3389 jj++; 3390 } 3391 3392 /* read in other processors and ship out */ 3393 for (i=1; i<size; i++) { 3394 nz = procsnz[i]; 3395 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3396 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3397 } 3398 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3399 } else { 3400 /* receive numeric values */ 3401 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3402 3403 /* receive message of values*/ 3404 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3405 3406 /* insert into matrix */ 3407 jj = rstart; 3408 smycols = mycols; 3409 svals = vals; 3410 for (i=0; i<m; i++) { 3411 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3412 smycols += ourlens[i]; 3413 svals += ourlens[i]; 3414 jj++; 3415 } 3416 } 3417 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3418 ierr = PetscFree(vals);CHKERRQ(ierr); 3419 ierr = PetscFree(mycols);CHKERRQ(ierr); 3420 ierr = PetscFree(rowners);CHKERRQ(ierr); 3421 3422 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3423 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3424 PetscFunctionReturn(0); 3425 } 3426 3427 #undef __FUNCT__ 3428 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3429 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3430 { 3431 PetscErrorCode ierr; 3432 IS iscol_local; 3433 PetscInt csize; 3434 3435 PetscFunctionBegin; 3436 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3437 if (call == MAT_REUSE_MATRIX) { 3438 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3439 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3440 } else { 3441 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3442 } 3443 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3444 if (call == MAT_INITIAL_MATRIX) { 3445 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3446 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3447 } 3448 PetscFunctionReturn(0); 3449 } 3450 3451 #undef __FUNCT__ 3452 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3453 /* 3454 Not great since it makes two copies of the submatrix, first an SeqAIJ 3455 in local and then by concatenating the local matrices the end result. 3456 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3457 3458 Note: This requires a sequential iscol with all indices. 3459 */ 3460 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3461 { 3462 PetscErrorCode ierr; 3463 PetscMPIInt rank,size; 3464 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3465 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3466 Mat *local,M,Mreuse; 3467 MatScalar *vwork,*aa; 3468 MPI_Comm comm = ((PetscObject)mat)->comm; 3469 Mat_SeqAIJ *aij; 3470 3471 3472 PetscFunctionBegin; 3473 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3474 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3475 3476 if (call == MAT_REUSE_MATRIX) { 3477 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3478 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3479 local = &Mreuse; 3480 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3481 } else { 3482 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3483 Mreuse = *local; 3484 ierr = PetscFree(local);CHKERRQ(ierr); 3485 } 3486 3487 /* 3488 m - number of local rows 3489 n - number of columns (same on all processors) 3490 rstart - first row in new global matrix generated 3491 */ 3492 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3493 if (call == MAT_INITIAL_MATRIX) { 3494 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3495 ii = aij->i; 3496 jj = aij->j; 3497 3498 /* 3499 Determine the number of non-zeros in the diagonal and off-diagonal 3500 portions of the matrix in order to do correct preallocation 3501 */ 3502 3503 /* first get start and end of "diagonal" columns */ 3504 if (csize == PETSC_DECIDE) { 3505 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3506 if (mglobal == n) { /* square matrix */ 3507 nlocal = m; 3508 } else { 3509 nlocal = n/size + ((n % size) > rank); 3510 } 3511 } else { 3512 nlocal = csize; 3513 } 3514 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3515 rstart = rend - nlocal; 3516 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3517 3518 /* next, compute all the lengths */ 3519 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3520 olens = dlens + m; 3521 for (i=0; i<m; i++) { 3522 jend = ii[i+1] - ii[i]; 3523 olen = 0; 3524 dlen = 0; 3525 for (j=0; j<jend; j++) { 3526 if (*jj < rstart || *jj >= rend) olen++; 3527 else dlen++; 3528 jj++; 3529 } 3530 olens[i] = olen; 3531 dlens[i] = dlen; 3532 } 3533 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3534 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3535 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3536 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3537 ierr = PetscFree(dlens);CHKERRQ(ierr); 3538 } else { 3539 PetscInt ml,nl; 3540 3541 M = *newmat; 3542 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3543 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3544 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3545 /* 3546 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3547 rather than the slower MatSetValues(). 3548 */ 3549 M->was_assembled = PETSC_TRUE; 3550 M->assembled = PETSC_FALSE; 3551 } 3552 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3553 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3554 ii = aij->i; 3555 jj = aij->j; 3556 aa = aij->a; 3557 for (i=0; i<m; i++) { 3558 row = rstart + i; 3559 nz = ii[i+1] - ii[i]; 3560 cwork = jj; jj += nz; 3561 vwork = aa; aa += nz; 3562 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3563 } 3564 3565 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3566 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3567 *newmat = M; 3568 3569 /* save submatrix used in processor for next request */ 3570 if (call == MAT_INITIAL_MATRIX) { 3571 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3572 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3573 } 3574 3575 PetscFunctionReturn(0); 3576 } 3577 3578 EXTERN_C_BEGIN 3579 #undef __FUNCT__ 3580 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3581 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3582 { 3583 PetscInt m,cstart, cend,j,nnz,i,d; 3584 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3585 const PetscInt *JJ; 3586 PetscScalar *values; 3587 PetscErrorCode ierr; 3588 3589 PetscFunctionBegin; 3590 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3591 3592 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3593 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3594 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3595 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3596 m = B->rmap->n; 3597 cstart = B->cmap->rstart; 3598 cend = B->cmap->rend; 3599 rstart = B->rmap->rstart; 3600 3601 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3602 3603 #if defined(PETSC_USE_DEBUGGING) 3604 for (i=0; i<m; i++) { 3605 nnz = Ii[i+1]- Ii[i]; 3606 JJ = J + Ii[i]; 3607 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3608 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3609 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3610 } 3611 #endif 3612 3613 for (i=0; i<m; i++) { 3614 nnz = Ii[i+1]- Ii[i]; 3615 JJ = J + Ii[i]; 3616 nnz_max = PetscMax(nnz_max,nnz); 3617 d = 0; 3618 for (j=0; j<nnz; j++) { 3619 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3620 } 3621 d_nnz[i] = d; 3622 o_nnz[i] = nnz - d; 3623 } 3624 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3625 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3626 3627 if (v) values = (PetscScalar*)v; 3628 else { 3629 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3630 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3631 } 3632 3633 for (i=0; i<m; i++) { 3634 ii = i + rstart; 3635 nnz = Ii[i+1]- Ii[i]; 3636 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3637 } 3638 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3639 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3640 3641 if (!v) { 3642 ierr = PetscFree(values);CHKERRQ(ierr); 3643 } 3644 PetscFunctionReturn(0); 3645 } 3646 EXTERN_C_END 3647 3648 #undef __FUNCT__ 3649 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3650 /*@ 3651 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3652 (the default parallel PETSc format). 3653 3654 Collective on MPI_Comm 3655 3656 Input Parameters: 3657 + B - the matrix 3658 . i - the indices into j for the start of each local row (starts with zero) 3659 . j - the column indices for each local row (starts with zero) 3660 - v - optional values in the matrix 3661 3662 Level: developer 3663 3664 Notes: 3665 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3666 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3667 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3668 3669 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3670 3671 The format which is used for the sparse matrix input, is equivalent to a 3672 row-major ordering.. i.e for the following matrix, the input data expected is 3673 as shown: 3674 3675 1 0 0 3676 2 0 3 P0 3677 ------- 3678 4 5 6 P1 3679 3680 Process0 [P0]: rows_owned=[0,1] 3681 i = {0,1,3} [size = nrow+1 = 2+1] 3682 j = {0,0,2} [size = nz = 6] 3683 v = {1,2,3} [size = nz = 6] 3684 3685 Process1 [P1]: rows_owned=[2] 3686 i = {0,3} [size = nrow+1 = 1+1] 3687 j = {0,1,2} [size = nz = 6] 3688 v = {4,5,6} [size = nz = 6] 3689 3690 .keywords: matrix, aij, compressed row, sparse, parallel 3691 3692 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3693 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3694 @*/ 3695 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3696 { 3697 PetscErrorCode ierr; 3698 3699 PetscFunctionBegin; 3700 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3701 PetscFunctionReturn(0); 3702 } 3703 3704 #undef __FUNCT__ 3705 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3706 /*@C 3707 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3708 (the default parallel PETSc format). For good matrix assembly performance 3709 the user should preallocate the matrix storage by setting the parameters 3710 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3711 performance can be increased by more than a factor of 50. 3712 3713 Collective on MPI_Comm 3714 3715 Input Parameters: 3716 + A - the matrix 3717 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3718 (same value is used for all local rows) 3719 . d_nnz - array containing the number of nonzeros in the various rows of the 3720 DIAGONAL portion of the local submatrix (possibly different for each row) 3721 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3722 The size of this array is equal to the number of local rows, i.e 'm'. 3723 You must leave room for the diagonal entry even if it is zero. 3724 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3725 submatrix (same value is used for all local rows). 3726 - o_nnz - array containing the number of nonzeros in the various rows of the 3727 OFF-DIAGONAL portion of the local submatrix (possibly different for 3728 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3729 structure. The size of this array is equal to the number 3730 of local rows, i.e 'm'. 3731 3732 If the *_nnz parameter is given then the *_nz parameter is ignored 3733 3734 The AIJ format (also called the Yale sparse matrix format or 3735 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3736 storage. The stored row and column indices begin with zero. 3737 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3738 3739 The parallel matrix is partitioned such that the first m0 rows belong to 3740 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3741 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3742 3743 The DIAGONAL portion of the local submatrix of a processor can be defined 3744 as the submatrix which is obtained by extraction the part corresponding to 3745 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3746 first row that belongs to the processor, r2 is the last row belonging to 3747 the this processor, and c1-c2 is range of indices of the local part of a 3748 vector suitable for applying the matrix to. This is an mxn matrix. In the 3749 common case of a square matrix, the row and column ranges are the same and 3750 the DIAGONAL part is also square. The remaining portion of the local 3751 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3752 3753 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3754 3755 You can call MatGetInfo() to get information on how effective the preallocation was; 3756 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3757 You can also run with the option -info and look for messages with the string 3758 malloc in them to see if additional memory allocation was needed. 3759 3760 Example usage: 3761 3762 Consider the following 8x8 matrix with 34 non-zero values, that is 3763 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3764 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3765 as follows: 3766 3767 .vb 3768 1 2 0 | 0 3 0 | 0 4 3769 Proc0 0 5 6 | 7 0 0 | 8 0 3770 9 0 10 | 11 0 0 | 12 0 3771 ------------------------------------- 3772 13 0 14 | 15 16 17 | 0 0 3773 Proc1 0 18 0 | 19 20 21 | 0 0 3774 0 0 0 | 22 23 0 | 24 0 3775 ------------------------------------- 3776 Proc2 25 26 27 | 0 0 28 | 29 0 3777 30 0 0 | 31 32 33 | 0 34 3778 .ve 3779 3780 This can be represented as a collection of submatrices as: 3781 3782 .vb 3783 A B C 3784 D E F 3785 G H I 3786 .ve 3787 3788 Where the submatrices A,B,C are owned by proc0, D,E,F are 3789 owned by proc1, G,H,I are owned by proc2. 3790 3791 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3792 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3793 The 'M','N' parameters are 8,8, and have the same values on all procs. 3794 3795 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3796 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3797 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3798 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3799 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3800 matrix, ans [DF] as another SeqAIJ matrix. 3801 3802 When d_nz, o_nz parameters are specified, d_nz storage elements are 3803 allocated for every row of the local diagonal submatrix, and o_nz 3804 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3805 One way to choose d_nz and o_nz is to use the max nonzerors per local 3806 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3807 In this case, the values of d_nz,o_nz are: 3808 .vb 3809 proc0 : dnz = 2, o_nz = 2 3810 proc1 : dnz = 3, o_nz = 2 3811 proc2 : dnz = 1, o_nz = 4 3812 .ve 3813 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3814 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3815 for proc3. i.e we are using 12+15+10=37 storage locations to store 3816 34 values. 3817 3818 When d_nnz, o_nnz parameters are specified, the storage is specified 3819 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3820 In the above case the values for d_nnz,o_nnz are: 3821 .vb 3822 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3823 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3824 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3825 .ve 3826 Here the space allocated is sum of all the above values i.e 34, and 3827 hence pre-allocation is perfect. 3828 3829 Level: intermediate 3830 3831 .keywords: matrix, aij, compressed row, sparse, parallel 3832 3833 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3834 MPIAIJ, MatGetInfo() 3835 @*/ 3836 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3837 { 3838 PetscErrorCode ierr; 3839 3840 PetscFunctionBegin; 3841 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3842 PetscFunctionReturn(0); 3843 } 3844 3845 #undef __FUNCT__ 3846 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3847 /*@ 3848 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3849 CSR format the local rows. 3850 3851 Collective on MPI_Comm 3852 3853 Input Parameters: 3854 + comm - MPI communicator 3855 . m - number of local rows (Cannot be PETSC_DECIDE) 3856 . n - This value should be the same as the local size used in creating the 3857 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3858 calculated if N is given) For square matrices n is almost always m. 3859 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3860 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3861 . i - row indices 3862 . j - column indices 3863 - a - matrix values 3864 3865 Output Parameter: 3866 . mat - the matrix 3867 3868 Level: intermediate 3869 3870 Notes: 3871 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3872 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3873 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3874 3875 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3876 3877 The format which is used for the sparse matrix input, is equivalent to a 3878 row-major ordering.. i.e for the following matrix, the input data expected is 3879 as shown: 3880 3881 1 0 0 3882 2 0 3 P0 3883 ------- 3884 4 5 6 P1 3885 3886 Process0 [P0]: rows_owned=[0,1] 3887 i = {0,1,3} [size = nrow+1 = 2+1] 3888 j = {0,0,2} [size = nz = 6] 3889 v = {1,2,3} [size = nz = 6] 3890 3891 Process1 [P1]: rows_owned=[2] 3892 i = {0,3} [size = nrow+1 = 1+1] 3893 j = {0,1,2} [size = nz = 6] 3894 v = {4,5,6} [size = nz = 6] 3895 3896 .keywords: matrix, aij, compressed row, sparse, parallel 3897 3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3899 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3900 @*/ 3901 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3902 { 3903 PetscErrorCode ierr; 3904 3905 PetscFunctionBegin; 3906 if (i[0]) { 3907 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3908 } 3909 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3910 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3911 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3912 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3913 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3914 PetscFunctionReturn(0); 3915 } 3916 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "MatCreateMPIAIJ" 3919 /*@C 3920 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3921 (the default parallel PETSc format). For good matrix assembly performance 3922 the user should preallocate the matrix storage by setting the parameters 3923 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3924 performance can be increased by more than a factor of 50. 3925 3926 Collective on MPI_Comm 3927 3928 Input Parameters: 3929 + comm - MPI communicator 3930 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3931 This value should be the same as the local size used in creating the 3932 y vector for the matrix-vector product y = Ax. 3933 . n - This value should be the same as the local size used in creating the 3934 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3935 calculated if N is given) For square matrices n is almost always m. 3936 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3937 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3938 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3939 (same value is used for all local rows) 3940 . d_nnz - array containing the number of nonzeros in the various rows of the 3941 DIAGONAL portion of the local submatrix (possibly different for each row) 3942 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3943 The size of this array is equal to the number of local rows, i.e 'm'. 3944 You must leave room for the diagonal entry even if it is zero. 3945 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3946 submatrix (same value is used for all local rows). 3947 - o_nnz - array containing the number of nonzeros in the various rows of the 3948 OFF-DIAGONAL portion of the local submatrix (possibly different for 3949 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3950 structure. The size of this array is equal to the number 3951 of local rows, i.e 'm'. 3952 3953 Output Parameter: 3954 . A - the matrix 3955 3956 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3957 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3958 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3959 3960 Notes: 3961 If the *_nnz parameter is given then the *_nz parameter is ignored 3962 3963 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3964 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3965 storage requirements for this matrix. 3966 3967 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3968 processor than it must be used on all processors that share the object for 3969 that argument. 3970 3971 The user MUST specify either the local or global matrix dimensions 3972 (possibly both). 3973 3974 The parallel matrix is partitioned across processors such that the 3975 first m0 rows belong to process 0, the next m1 rows belong to 3976 process 1, the next m2 rows belong to process 2 etc.. where 3977 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3978 values corresponding to [m x N] submatrix. 3979 3980 The columns are logically partitioned with the n0 columns belonging 3981 to 0th partition, the next n1 columns belonging to the next 3982 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3983 3984 The DIAGONAL portion of the local submatrix on any given processor 3985 is the submatrix corresponding to the rows and columns m,n 3986 corresponding to the given processor. i.e diagonal matrix on 3987 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3988 etc. The remaining portion of the local submatrix [m x (N-n)] 3989 constitute the OFF-DIAGONAL portion. The example below better 3990 illustrates this concept. 3991 3992 For a square global matrix we define each processor's diagonal portion 3993 to be its local rows and the corresponding columns (a square submatrix); 3994 each processor's off-diagonal portion encompasses the remainder of the 3995 local matrix (a rectangular submatrix). 3996 3997 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3998 3999 When calling this routine with a single process communicator, a matrix of 4000 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4001 type of communicator, use the construction mechanism: 4002 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4003 4004 By default, this format uses inodes (identical nodes) when possible. 4005 We search for consecutive rows with the same nonzero structure, thereby 4006 reusing matrix information to achieve increased efficiency. 4007 4008 Options Database Keys: 4009 + -mat_no_inode - Do not use inodes 4010 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4011 - -mat_aij_oneindex - Internally use indexing starting at 1 4012 rather than 0. Note that when calling MatSetValues(), 4013 the user still MUST index entries starting at 0! 4014 4015 4016 Example usage: 4017 4018 Consider the following 8x8 matrix with 34 non-zero values, that is 4019 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4020 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4021 as follows: 4022 4023 .vb 4024 1 2 0 | 0 3 0 | 0 4 4025 Proc0 0 5 6 | 7 0 0 | 8 0 4026 9 0 10 | 11 0 0 | 12 0 4027 ------------------------------------- 4028 13 0 14 | 15 16 17 | 0 0 4029 Proc1 0 18 0 | 19 20 21 | 0 0 4030 0 0 0 | 22 23 0 | 24 0 4031 ------------------------------------- 4032 Proc2 25 26 27 | 0 0 28 | 29 0 4033 30 0 0 | 31 32 33 | 0 34 4034 .ve 4035 4036 This can be represented as a collection of submatrices as: 4037 4038 .vb 4039 A B C 4040 D E F 4041 G H I 4042 .ve 4043 4044 Where the submatrices A,B,C are owned by proc0, D,E,F are 4045 owned by proc1, G,H,I are owned by proc2. 4046 4047 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4048 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4049 The 'M','N' parameters are 8,8, and have the same values on all procs. 4050 4051 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4052 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4053 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4054 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4055 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4056 matrix, ans [DF] as another SeqAIJ matrix. 4057 4058 When d_nz, o_nz parameters are specified, d_nz storage elements are 4059 allocated for every row of the local diagonal submatrix, and o_nz 4060 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4061 One way to choose d_nz and o_nz is to use the max nonzerors per local 4062 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4063 In this case, the values of d_nz,o_nz are: 4064 .vb 4065 proc0 : dnz = 2, o_nz = 2 4066 proc1 : dnz = 3, o_nz = 2 4067 proc2 : dnz = 1, o_nz = 4 4068 .ve 4069 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4070 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4071 for proc3. i.e we are using 12+15+10=37 storage locations to store 4072 34 values. 4073 4074 When d_nnz, o_nnz parameters are specified, the storage is specified 4075 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4076 In the above case the values for d_nnz,o_nnz are: 4077 .vb 4078 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4079 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4080 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4081 .ve 4082 Here the space allocated is sum of all the above values i.e 34, and 4083 hence pre-allocation is perfect. 4084 4085 Level: intermediate 4086 4087 .keywords: matrix, aij, compressed row, sparse, parallel 4088 4089 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4090 MPIAIJ, MatCreateMPIAIJWithArrays() 4091 @*/ 4092 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4093 { 4094 PetscErrorCode ierr; 4095 PetscMPIInt size; 4096 4097 PetscFunctionBegin; 4098 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4099 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4101 if (size > 1) { 4102 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4103 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4104 } else { 4105 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4106 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4107 } 4108 PetscFunctionReturn(0); 4109 } 4110 4111 #undef __FUNCT__ 4112 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4113 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4114 { 4115 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4116 4117 PetscFunctionBegin; 4118 *Ad = a->A; 4119 *Ao = a->B; 4120 *colmap = a->garray; 4121 PetscFunctionReturn(0); 4122 } 4123 4124 #undef __FUNCT__ 4125 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4126 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4127 { 4128 PetscErrorCode ierr; 4129 PetscInt i; 4130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4131 4132 PetscFunctionBegin; 4133 if (coloring->ctype == IS_COLORING_GLOBAL) { 4134 ISColoringValue *allcolors,*colors; 4135 ISColoring ocoloring; 4136 4137 /* set coloring for diagonal portion */ 4138 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4139 4140 /* set coloring for off-diagonal portion */ 4141 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4142 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4143 for (i=0; i<a->B->cmap->n; i++) { 4144 colors[i] = allcolors[a->garray[i]]; 4145 } 4146 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4147 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4148 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4149 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4150 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4151 ISColoringValue *colors; 4152 PetscInt *larray; 4153 ISColoring ocoloring; 4154 4155 /* set coloring for diagonal portion */ 4156 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4157 for (i=0; i<a->A->cmap->n; i++) { 4158 larray[i] = i + A->cmap->rstart; 4159 } 4160 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4161 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4162 for (i=0; i<a->A->cmap->n; i++) { 4163 colors[i] = coloring->colors[larray[i]]; 4164 } 4165 ierr = PetscFree(larray);CHKERRQ(ierr); 4166 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4167 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4168 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4169 4170 /* set coloring for off-diagonal portion */ 4171 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4172 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4173 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4174 for (i=0; i<a->B->cmap->n; i++) { 4175 colors[i] = coloring->colors[larray[i]]; 4176 } 4177 ierr = PetscFree(larray);CHKERRQ(ierr); 4178 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4179 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4180 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4181 } else { 4182 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4183 } 4184 4185 PetscFunctionReturn(0); 4186 } 4187 4188 #if defined(PETSC_HAVE_ADIC) 4189 #undef __FUNCT__ 4190 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4191 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4192 { 4193 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4194 PetscErrorCode ierr; 4195 4196 PetscFunctionBegin; 4197 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4198 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4199 PetscFunctionReturn(0); 4200 } 4201 #endif 4202 4203 #undef __FUNCT__ 4204 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4205 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4206 { 4207 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4208 PetscErrorCode ierr; 4209 4210 PetscFunctionBegin; 4211 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4212 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4213 PetscFunctionReturn(0); 4214 } 4215 4216 #undef __FUNCT__ 4217 #define __FUNCT__ "MatMerge" 4218 /*@ 4219 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4220 matrices from each processor 4221 4222 Collective on MPI_Comm 4223 4224 Input Parameters: 4225 + comm - the communicators the parallel matrix will live on 4226 . inmat - the input sequential matrices 4227 . n - number of local columns (or PETSC_DECIDE) 4228 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4229 4230 Output Parameter: 4231 . outmat - the parallel matrix generated 4232 4233 Level: advanced 4234 4235 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4236 4237 @*/ 4238 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4239 { 4240 PetscErrorCode ierr; 4241 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4242 PetscInt *indx; 4243 PetscScalar *values; 4244 4245 PetscFunctionBegin; 4246 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4247 if (scall == MAT_INITIAL_MATRIX){ 4248 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4249 if (n == PETSC_DECIDE){ 4250 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4251 } 4252 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4253 rstart -= m; 4254 4255 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4256 for (i=0;i<m;i++) { 4257 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4258 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4259 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4260 } 4261 /* This routine will ONLY return MPIAIJ type matrix */ 4262 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4263 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4264 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4265 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4266 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4267 4268 } else if (scall == MAT_REUSE_MATRIX){ 4269 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4270 } else { 4271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4272 } 4273 4274 for (i=0;i<m;i++) { 4275 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4276 Ii = i + rstart; 4277 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4278 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4279 } 4280 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4281 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4282 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4283 4284 PetscFunctionReturn(0); 4285 } 4286 4287 #undef __FUNCT__ 4288 #define __FUNCT__ "MatFileSplit" 4289 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4290 { 4291 PetscErrorCode ierr; 4292 PetscMPIInt rank; 4293 PetscInt m,N,i,rstart,nnz; 4294 size_t len; 4295 const PetscInt *indx; 4296 PetscViewer out; 4297 char *name; 4298 Mat B; 4299 const PetscScalar *values; 4300 4301 PetscFunctionBegin; 4302 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4303 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4304 /* Should this be the type of the diagonal block of A? */ 4305 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4306 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4307 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4308 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4309 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4310 for (i=0;i<m;i++) { 4311 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4312 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4313 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4314 } 4315 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4316 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4317 4318 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4319 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4320 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4321 sprintf(name,"%s.%d",outfile,rank); 4322 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4323 ierr = PetscFree(name); 4324 ierr = MatView(B,out);CHKERRQ(ierr); 4325 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4326 ierr = MatDestroy(B);CHKERRQ(ierr); 4327 PetscFunctionReturn(0); 4328 } 4329 4330 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4333 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4334 { 4335 PetscErrorCode ierr; 4336 Mat_Merge_SeqsToMPI *merge; 4337 PetscContainer container; 4338 4339 PetscFunctionBegin; 4340 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4341 if (container) { 4342 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4343 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4344 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4345 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4346 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4347 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4348 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4349 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4350 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4351 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4352 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4353 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4354 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4355 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4356 4357 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4358 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4359 } 4360 ierr = PetscFree(merge);CHKERRQ(ierr); 4361 4362 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4363 PetscFunctionReturn(0); 4364 } 4365 4366 #include <../src/mat/utils/freespace.h> 4367 #include <petscbt.h> 4368 4369 #undef __FUNCT__ 4370 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4371 /*@C 4372 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4373 matrices from each processor 4374 4375 Collective on MPI_Comm 4376 4377 Input Parameters: 4378 + comm - the communicators the parallel matrix will live on 4379 . seqmat - the input sequential matrices 4380 . m - number of local rows (or PETSC_DECIDE) 4381 . n - number of local columns (or PETSC_DECIDE) 4382 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4383 4384 Output Parameter: 4385 . mpimat - the parallel matrix generated 4386 4387 Level: advanced 4388 4389 Notes: 4390 The dimensions of the sequential matrix in each processor MUST be the same. 4391 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4392 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4393 @*/ 4394 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4395 { 4396 PetscErrorCode ierr; 4397 MPI_Comm comm=((PetscObject)mpimat)->comm; 4398 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4399 PetscMPIInt size,rank,taga,*len_s; 4400 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4401 PetscInt proc,m; 4402 PetscInt **buf_ri,**buf_rj; 4403 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4404 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4405 MPI_Request *s_waits,*r_waits; 4406 MPI_Status *status; 4407 MatScalar *aa=a->a; 4408 MatScalar **abuf_r,*ba_i; 4409 Mat_Merge_SeqsToMPI *merge; 4410 PetscContainer container; 4411 4412 PetscFunctionBegin; 4413 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4414 4415 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4416 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4417 4418 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4419 if (container) { 4420 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4421 } 4422 bi = merge->bi; 4423 bj = merge->bj; 4424 buf_ri = merge->buf_ri; 4425 buf_rj = merge->buf_rj; 4426 4427 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4428 owners = merge->rowmap->range; 4429 len_s = merge->len_s; 4430 4431 /* send and recv matrix values */ 4432 /*-----------------------------*/ 4433 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4434 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4435 4436 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4437 for (proc=0,k=0; proc<size; proc++){ 4438 if (!len_s[proc]) continue; 4439 i = owners[proc]; 4440 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4441 k++; 4442 } 4443 4444 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4445 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4446 ierr = PetscFree(status);CHKERRQ(ierr); 4447 4448 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4449 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4450 4451 /* insert mat values of mpimat */ 4452 /*----------------------------*/ 4453 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4454 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4455 4456 for (k=0; k<merge->nrecv; k++){ 4457 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4458 nrows = *(buf_ri_k[k]); 4459 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4460 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4461 } 4462 4463 /* set values of ba */ 4464 m = merge->rowmap->n; 4465 for (i=0; i<m; i++) { 4466 arow = owners[rank] + i; 4467 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4468 bnzi = bi[i+1] - bi[i]; 4469 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4470 4471 /* add local non-zero vals of this proc's seqmat into ba */ 4472 anzi = ai[arow+1] - ai[arow]; 4473 aj = a->j + ai[arow]; 4474 aa = a->a + ai[arow]; 4475 nextaj = 0; 4476 for (j=0; nextaj<anzi; j++){ 4477 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4478 ba_i[j] += aa[nextaj++]; 4479 } 4480 } 4481 4482 /* add received vals into ba */ 4483 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4484 /* i-th row */ 4485 if (i == *nextrow[k]) { 4486 anzi = *(nextai[k]+1) - *nextai[k]; 4487 aj = buf_rj[k] + *(nextai[k]); 4488 aa = abuf_r[k] + *(nextai[k]); 4489 nextaj = 0; 4490 for (j=0; nextaj<anzi; j++){ 4491 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4492 ba_i[j] += aa[nextaj++]; 4493 } 4494 } 4495 nextrow[k]++; nextai[k]++; 4496 } 4497 } 4498 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4499 } 4500 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4501 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4502 4503 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4504 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4505 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4506 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4507 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4508 PetscFunctionReturn(0); 4509 } 4510 4511 #undef __FUNCT__ 4512 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4513 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4514 { 4515 PetscErrorCode ierr; 4516 Mat B_mpi; 4517 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4518 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4519 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4520 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4521 PetscInt len,proc,*dnz,*onz; 4522 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4523 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4524 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4525 MPI_Status *status; 4526 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4527 PetscBT lnkbt; 4528 Mat_Merge_SeqsToMPI *merge; 4529 PetscContainer container; 4530 4531 PetscFunctionBegin; 4532 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4533 4534 /* make sure it is a PETSc comm */ 4535 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4537 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4538 4539 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4540 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4541 4542 /* determine row ownership */ 4543 /*---------------------------------------------------------*/ 4544 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4545 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4546 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4547 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4548 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4549 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4550 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4551 4552 m = merge->rowmap->n; 4553 M = merge->rowmap->N; 4554 owners = merge->rowmap->range; 4555 4556 /* determine the number of messages to send, their lengths */ 4557 /*---------------------------------------------------------*/ 4558 len_s = merge->len_s; 4559 4560 len = 0; /* length of buf_si[] */ 4561 merge->nsend = 0; 4562 for (proc=0; proc<size; proc++){ 4563 len_si[proc] = 0; 4564 if (proc == rank){ 4565 len_s[proc] = 0; 4566 } else { 4567 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4568 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4569 } 4570 if (len_s[proc]) { 4571 merge->nsend++; 4572 nrows = 0; 4573 for (i=owners[proc]; i<owners[proc+1]; i++){ 4574 if (ai[i+1] > ai[i]) nrows++; 4575 } 4576 len_si[proc] = 2*(nrows+1); 4577 len += len_si[proc]; 4578 } 4579 } 4580 4581 /* determine the number and length of messages to receive for ij-structure */ 4582 /*-------------------------------------------------------------------------*/ 4583 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4584 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4585 4586 /* post the Irecv of j-structure */ 4587 /*-------------------------------*/ 4588 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4589 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4590 4591 /* post the Isend of j-structure */ 4592 /*--------------------------------*/ 4593 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4594 4595 for (proc=0, k=0; proc<size; proc++){ 4596 if (!len_s[proc]) continue; 4597 i = owners[proc]; 4598 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4599 k++; 4600 } 4601 4602 /* receives and sends of j-structure are complete */ 4603 /*------------------------------------------------*/ 4604 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4605 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4606 4607 /* send and recv i-structure */ 4608 /*---------------------------*/ 4609 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4610 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4611 4612 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4613 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4614 for (proc=0,k=0; proc<size; proc++){ 4615 if (!len_s[proc]) continue; 4616 /* form outgoing message for i-structure: 4617 buf_si[0]: nrows to be sent 4618 [1:nrows]: row index (global) 4619 [nrows+1:2*nrows+1]: i-structure index 4620 */ 4621 /*-------------------------------------------*/ 4622 nrows = len_si[proc]/2 - 1; 4623 buf_si_i = buf_si + nrows+1; 4624 buf_si[0] = nrows; 4625 buf_si_i[0] = 0; 4626 nrows = 0; 4627 for (i=owners[proc]; i<owners[proc+1]; i++){ 4628 anzi = ai[i+1] - ai[i]; 4629 if (anzi) { 4630 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4631 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4632 nrows++; 4633 } 4634 } 4635 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4636 k++; 4637 buf_si += len_si[proc]; 4638 } 4639 4640 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4641 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4642 4643 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4644 for (i=0; i<merge->nrecv; i++){ 4645 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4646 } 4647 4648 ierr = PetscFree(len_si);CHKERRQ(ierr); 4649 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4650 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4651 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4652 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4653 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4654 ierr = PetscFree(status);CHKERRQ(ierr); 4655 4656 /* compute a local seq matrix in each processor */ 4657 /*----------------------------------------------*/ 4658 /* allocate bi array and free space for accumulating nonzero column info */ 4659 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4660 bi[0] = 0; 4661 4662 /* create and initialize a linked list */ 4663 nlnk = N+1; 4664 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4665 4666 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4667 len = 0; 4668 len = ai[owners[rank+1]] - ai[owners[rank]]; 4669 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4670 current_space = free_space; 4671 4672 /* determine symbolic info for each local row */ 4673 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4674 4675 for (k=0; k<merge->nrecv; k++){ 4676 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4677 nrows = *buf_ri_k[k]; 4678 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4679 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4680 } 4681 4682 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4683 len = 0; 4684 for (i=0;i<m;i++) { 4685 bnzi = 0; 4686 /* add local non-zero cols of this proc's seqmat into lnk */ 4687 arow = owners[rank] + i; 4688 anzi = ai[arow+1] - ai[arow]; 4689 aj = a->j + ai[arow]; 4690 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4691 bnzi += nlnk; 4692 /* add received col data into lnk */ 4693 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4694 if (i == *nextrow[k]) { /* i-th row */ 4695 anzi = *(nextai[k]+1) - *nextai[k]; 4696 aj = buf_rj[k] + *nextai[k]; 4697 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4698 bnzi += nlnk; 4699 nextrow[k]++; nextai[k]++; 4700 } 4701 } 4702 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4703 4704 /* if free space is not available, make more free space */ 4705 if (current_space->local_remaining<bnzi) { 4706 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4707 nspacedouble++; 4708 } 4709 /* copy data into free space, then initialize lnk */ 4710 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4711 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4712 4713 current_space->array += bnzi; 4714 current_space->local_used += bnzi; 4715 current_space->local_remaining -= bnzi; 4716 4717 bi[i+1] = bi[i] + bnzi; 4718 } 4719 4720 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4721 4722 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4723 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4724 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4725 4726 /* create symbolic parallel matrix B_mpi */ 4727 /*---------------------------------------*/ 4728 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4729 if (n==PETSC_DECIDE) { 4730 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4731 } else { 4732 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4733 } 4734 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4735 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4736 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4737 4738 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4739 B_mpi->assembled = PETSC_FALSE; 4740 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4741 merge->bi = bi; 4742 merge->bj = bj; 4743 merge->buf_ri = buf_ri; 4744 merge->buf_rj = buf_rj; 4745 merge->coi = PETSC_NULL; 4746 merge->coj = PETSC_NULL; 4747 merge->owners_co = PETSC_NULL; 4748 4749 /* attach the supporting struct to B_mpi for reuse */ 4750 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4751 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4752 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4753 *mpimat = B_mpi; 4754 4755 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4756 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4757 PetscFunctionReturn(0); 4758 } 4759 4760 #undef __FUNCT__ 4761 #define __FUNCT__ "MatMerge_SeqsToMPI" 4762 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4763 { 4764 PetscErrorCode ierr; 4765 4766 PetscFunctionBegin; 4767 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4768 if (scall == MAT_INITIAL_MATRIX){ 4769 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4770 } 4771 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4772 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4773 PetscFunctionReturn(0); 4774 } 4775 4776 #undef __FUNCT__ 4777 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4778 /*@ 4779 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4780 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4781 with MatGetSize() 4782 4783 Not Collective 4784 4785 Input Parameters: 4786 + A - the matrix 4787 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4788 4789 Output Parameter: 4790 . A_loc - the local sequential matrix generated 4791 4792 Level: developer 4793 4794 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4795 4796 @*/ 4797 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4798 { 4799 PetscErrorCode ierr; 4800 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4801 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4802 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4803 MatScalar *aa=a->a,*ba=b->a,*cam; 4804 PetscScalar *ca; 4805 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4806 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4807 PetscBool match; 4808 4809 PetscFunctionBegin; 4810 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4811 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4812 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4813 if (scall == MAT_INITIAL_MATRIX){ 4814 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4815 ci[0] = 0; 4816 for (i=0; i<am; i++){ 4817 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4818 } 4819 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4820 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4821 k = 0; 4822 for (i=0; i<am; i++) { 4823 ncols_o = bi[i+1] - bi[i]; 4824 ncols_d = ai[i+1] - ai[i]; 4825 /* off-diagonal portion of A */ 4826 for (jo=0; jo<ncols_o; jo++) { 4827 col = cmap[*bj]; 4828 if (col >= cstart) break; 4829 cj[k] = col; bj++; 4830 ca[k++] = *ba++; 4831 } 4832 /* diagonal portion of A */ 4833 for (j=0; j<ncols_d; j++) { 4834 cj[k] = cstart + *aj++; 4835 ca[k++] = *aa++; 4836 } 4837 /* off-diagonal portion of A */ 4838 for (j=jo; j<ncols_o; j++) { 4839 cj[k] = cmap[*bj++]; 4840 ca[k++] = *ba++; 4841 } 4842 } 4843 /* put together the new matrix */ 4844 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4845 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4846 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4847 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4848 mat->free_a = PETSC_TRUE; 4849 mat->free_ij = PETSC_TRUE; 4850 mat->nonew = 0; 4851 } else if (scall == MAT_REUSE_MATRIX){ 4852 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4853 ci = mat->i; cj = mat->j; cam = mat->a; 4854 for (i=0; i<am; i++) { 4855 /* off-diagonal portion of A */ 4856 ncols_o = bi[i+1] - bi[i]; 4857 for (jo=0; jo<ncols_o; jo++) { 4858 col = cmap[*bj]; 4859 if (col >= cstart) break; 4860 *cam++ = *ba++; bj++; 4861 } 4862 /* diagonal portion of A */ 4863 ncols_d = ai[i+1] - ai[i]; 4864 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4865 /* off-diagonal portion of A */ 4866 for (j=jo; j<ncols_o; j++) { 4867 *cam++ = *ba++; bj++; 4868 } 4869 } 4870 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4871 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4877 /*@C 4878 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4879 4880 Not Collective 4881 4882 Input Parameters: 4883 + A - the matrix 4884 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4885 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4886 4887 Output Parameter: 4888 . A_loc - the local sequential matrix generated 4889 4890 Level: developer 4891 4892 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4893 4894 @*/ 4895 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4896 { 4897 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4898 PetscErrorCode ierr; 4899 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4900 IS isrowa,iscola; 4901 Mat *aloc; 4902 PetscBool match; 4903 4904 PetscFunctionBegin; 4905 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4906 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4907 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4908 if (!row){ 4909 start = A->rmap->rstart; end = A->rmap->rend; 4910 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4911 } else { 4912 isrowa = *row; 4913 } 4914 if (!col){ 4915 start = A->cmap->rstart; 4916 cmap = a->garray; 4917 nzA = a->A->cmap->n; 4918 nzB = a->B->cmap->n; 4919 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4920 ncols = 0; 4921 for (i=0; i<nzB; i++) { 4922 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4923 else break; 4924 } 4925 imark = i; 4926 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4927 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4928 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4929 } else { 4930 iscola = *col; 4931 } 4932 if (scall != MAT_INITIAL_MATRIX){ 4933 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4934 aloc[0] = *A_loc; 4935 } 4936 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4937 *A_loc = aloc[0]; 4938 ierr = PetscFree(aloc);CHKERRQ(ierr); 4939 if (!row){ 4940 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4941 } 4942 if (!col){ 4943 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4944 } 4945 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 4949 #undef __FUNCT__ 4950 #define __FUNCT__ "MatGetBrowsOfAcols" 4951 /*@C 4952 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4953 4954 Collective on Mat 4955 4956 Input Parameters: 4957 + A,B - the matrices in mpiaij format 4958 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4959 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4960 4961 Output Parameter: 4962 + rowb, colb - index sets of rows and columns of B to extract 4963 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4964 - B_seq - the sequential matrix generated 4965 4966 Level: developer 4967 4968 @*/ 4969 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4970 { 4971 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4972 PetscErrorCode ierr; 4973 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4974 IS isrowb,iscolb; 4975 Mat *bseq; 4976 4977 PetscFunctionBegin; 4978 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4979 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4980 } 4981 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4982 4983 if (scall == MAT_INITIAL_MATRIX){ 4984 start = A->cmap->rstart; 4985 cmap = a->garray; 4986 nzA = a->A->cmap->n; 4987 nzB = a->B->cmap->n; 4988 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4989 ncols = 0; 4990 for (i=0; i<nzB; i++) { /* row < local row index */ 4991 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4992 else break; 4993 } 4994 imark = i; 4995 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4996 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4997 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4998 *brstart = imark; 4999 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5000 } else { 5001 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5002 isrowb = *rowb; iscolb = *colb; 5003 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5004 bseq[0] = *B_seq; 5005 } 5006 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5007 *B_seq = bseq[0]; 5008 ierr = PetscFree(bseq);CHKERRQ(ierr); 5009 if (!rowb){ 5010 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 5011 } else { 5012 *rowb = isrowb; 5013 } 5014 if (!colb){ 5015 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 5016 } else { 5017 *colb = iscolb; 5018 } 5019 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5020 PetscFunctionReturn(0); 5021 } 5022 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "MatGetBrowsOfAoCols" 5025 /*@C 5026 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5027 of the OFF-DIAGONAL portion of local A 5028 5029 Collective on Mat 5030 5031 Input Parameters: 5032 + A,B - the matrices in mpiaij format 5033 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5034 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5035 . startsj_r - similar to startsj for receives 5036 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5037 5038 Output Parameter: 5039 + B_oth - the sequential matrix generated 5040 5041 Level: developer 5042 5043 @*/ 5044 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5045 { 5046 VecScatter_MPI_General *gen_to,*gen_from; 5047 PetscErrorCode ierr; 5048 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5049 Mat_SeqAIJ *b_oth; 5050 VecScatter ctx=a->Mvctx; 5051 MPI_Comm comm=((PetscObject)ctx)->comm; 5052 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5053 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5054 PetscScalar *rvalues,*svalues; 5055 MatScalar *b_otha,*bufa,*bufA; 5056 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5057 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5058 MPI_Status *sstatus,rstatus; 5059 PetscMPIInt jj; 5060 PetscInt *cols,sbs,rbs; 5061 PetscScalar *vals; 5062 5063 PetscFunctionBegin; 5064 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5065 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5066 } 5067 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5068 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5069 5070 gen_to = (VecScatter_MPI_General*)ctx->todata; 5071 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5072 rvalues = gen_from->values; /* holds the length of receiving row */ 5073 svalues = gen_to->values; /* holds the length of sending row */ 5074 nrecvs = gen_from->n; 5075 nsends = gen_to->n; 5076 5077 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5078 srow = gen_to->indices; /* local row index to be sent */ 5079 sstarts = gen_to->starts; 5080 sprocs = gen_to->procs; 5081 sstatus = gen_to->sstatus; 5082 sbs = gen_to->bs; 5083 rstarts = gen_from->starts; 5084 rprocs = gen_from->procs; 5085 rbs = gen_from->bs; 5086 5087 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5088 if (scall == MAT_INITIAL_MATRIX){ 5089 /* i-array */ 5090 /*---------*/ 5091 /* post receives */ 5092 for (i=0; i<nrecvs; i++){ 5093 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5094 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5095 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5096 } 5097 5098 /* pack the outgoing message */ 5099 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5100 sstartsj[0] = 0; rstartsj[0] = 0; 5101 len = 0; /* total length of j or a array to be sent */ 5102 k = 0; 5103 for (i=0; i<nsends; i++){ 5104 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5105 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5106 for (j=0; j<nrows; j++) { 5107 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5108 for (l=0; l<sbs; l++){ 5109 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5110 rowlen[j*sbs+l] = ncols; 5111 len += ncols; 5112 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5113 } 5114 k++; 5115 } 5116 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5117 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5118 } 5119 /* recvs and sends of i-array are completed */ 5120 i = nrecvs; 5121 while (i--) { 5122 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5123 } 5124 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5125 5126 /* allocate buffers for sending j and a arrays */ 5127 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5128 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5129 5130 /* create i-array of B_oth */ 5131 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5132 b_othi[0] = 0; 5133 len = 0; /* total length of j or a array to be received */ 5134 k = 0; 5135 for (i=0; i<nrecvs; i++){ 5136 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5137 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5138 for (j=0; j<nrows; j++) { 5139 b_othi[k+1] = b_othi[k] + rowlen[j]; 5140 len += rowlen[j]; k++; 5141 } 5142 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5143 } 5144 5145 /* allocate space for j and a arrrays of B_oth */ 5146 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5147 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5148 5149 /* j-array */ 5150 /*---------*/ 5151 /* post receives of j-array */ 5152 for (i=0; i<nrecvs; i++){ 5153 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5154 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5155 } 5156 5157 /* pack the outgoing message j-array */ 5158 k = 0; 5159 for (i=0; i<nsends; i++){ 5160 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5161 bufJ = bufj+sstartsj[i]; 5162 for (j=0; j<nrows; j++) { 5163 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5164 for (ll=0; ll<sbs; ll++){ 5165 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5166 for (l=0; l<ncols; l++){ 5167 *bufJ++ = cols[l]; 5168 } 5169 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5170 } 5171 } 5172 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5173 } 5174 5175 /* recvs and sends of j-array are completed */ 5176 i = nrecvs; 5177 while (i--) { 5178 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5179 } 5180 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5181 } else if (scall == MAT_REUSE_MATRIX){ 5182 sstartsj = *startsj; 5183 rstartsj = *startsj_r; 5184 bufa = *bufa_ptr; 5185 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5186 b_otha = b_oth->a; 5187 } else { 5188 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5189 } 5190 5191 /* a-array */ 5192 /*---------*/ 5193 /* post receives of a-array */ 5194 for (i=0; i<nrecvs; i++){ 5195 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5196 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5197 } 5198 5199 /* pack the outgoing message a-array */ 5200 k = 0; 5201 for (i=0; i<nsends; i++){ 5202 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5203 bufA = bufa+sstartsj[i]; 5204 for (j=0; j<nrows; j++) { 5205 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5206 for (ll=0; ll<sbs; ll++){ 5207 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5208 for (l=0; l<ncols; l++){ 5209 *bufA++ = vals[l]; 5210 } 5211 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5212 } 5213 } 5214 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5215 } 5216 /* recvs and sends of a-array are completed */ 5217 i = nrecvs; 5218 while (i--) { 5219 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5220 } 5221 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5222 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5223 5224 if (scall == MAT_INITIAL_MATRIX){ 5225 /* put together the new matrix */ 5226 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5227 5228 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5229 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5230 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5231 b_oth->free_a = PETSC_TRUE; 5232 b_oth->free_ij = PETSC_TRUE; 5233 b_oth->nonew = 0; 5234 5235 ierr = PetscFree(bufj);CHKERRQ(ierr); 5236 if (!startsj || !bufa_ptr){ 5237 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5238 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5239 } else { 5240 *startsj = sstartsj; 5241 *startsj_r = rstartsj; 5242 *bufa_ptr = bufa; 5243 } 5244 } 5245 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5246 PetscFunctionReturn(0); 5247 } 5248 5249 #undef __FUNCT__ 5250 #define __FUNCT__ "MatGetCommunicationStructs" 5251 /*@C 5252 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5253 5254 Not Collective 5255 5256 Input Parameters: 5257 . A - The matrix in mpiaij format 5258 5259 Output Parameter: 5260 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5261 . colmap - A map from global column index to local index into lvec 5262 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5263 5264 Level: developer 5265 5266 @*/ 5267 #if defined (PETSC_USE_CTABLE) 5268 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5269 #else 5270 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5271 #endif 5272 { 5273 Mat_MPIAIJ *a; 5274 5275 PetscFunctionBegin; 5276 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5277 PetscValidPointer(lvec, 2); 5278 PetscValidPointer(colmap, 3); 5279 PetscValidPointer(multScatter, 4); 5280 a = (Mat_MPIAIJ *) A->data; 5281 if (lvec) *lvec = a->lvec; 5282 if (colmap) *colmap = a->colmap; 5283 if (multScatter) *multScatter = a->Mvctx; 5284 PetscFunctionReturn(0); 5285 } 5286 5287 EXTERN_C_BEGIN 5288 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5289 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5290 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5291 EXTERN_C_END 5292 5293 #undef __FUNCT__ 5294 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5295 /* 5296 Computes (B'*A')' since computing B*A directly is untenable 5297 5298 n p p 5299 ( ) ( ) ( ) 5300 m ( A ) * n ( B ) = m ( C ) 5301 ( ) ( ) ( ) 5302 5303 */ 5304 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5305 { 5306 PetscErrorCode ierr; 5307 Mat At,Bt,Ct; 5308 5309 PetscFunctionBegin; 5310 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5311 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5312 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5313 ierr = MatDestroy(At);CHKERRQ(ierr); 5314 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5315 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5316 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5317 PetscFunctionReturn(0); 5318 } 5319 5320 #undef __FUNCT__ 5321 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5322 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5323 { 5324 PetscErrorCode ierr; 5325 PetscInt m=A->rmap->n,n=B->cmap->n; 5326 Mat Cmat; 5327 5328 PetscFunctionBegin; 5329 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5330 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5331 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5332 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5333 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5334 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5335 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5336 *C = Cmat; 5337 PetscFunctionReturn(0); 5338 } 5339 5340 /* ----------------------------------------------------------------*/ 5341 #undef __FUNCT__ 5342 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5343 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5344 { 5345 PetscErrorCode ierr; 5346 5347 PetscFunctionBegin; 5348 if (scall == MAT_INITIAL_MATRIX){ 5349 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5350 } 5351 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5352 PetscFunctionReturn(0); 5353 } 5354 5355 EXTERN_C_BEGIN 5356 #if defined(PETSC_HAVE_MUMPS) 5357 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5358 #endif 5359 #if defined(PETSC_HAVE_PASTIX) 5360 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5361 #endif 5362 #if defined(PETSC_HAVE_SUPERLU_DIST) 5363 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5364 #endif 5365 #if defined(PETSC_HAVE_SPOOLES) 5366 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5367 #endif 5368 EXTERN_C_END 5369 5370 /*MC 5371 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5372 5373 Options Database Keys: 5374 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5375 5376 Level: beginner 5377 5378 .seealso: MatCreateMPIAIJ() 5379 M*/ 5380 5381 EXTERN_C_BEGIN 5382 #undef __FUNCT__ 5383 #define __FUNCT__ "MatCreate_MPIAIJ" 5384 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5385 { 5386 Mat_MPIAIJ *b; 5387 PetscErrorCode ierr; 5388 PetscMPIInt size; 5389 5390 PetscFunctionBegin; 5391 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5392 5393 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5394 B->data = (void*)b; 5395 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5396 B->rmap->bs = 1; 5397 B->assembled = PETSC_FALSE; 5398 5399 B->insertmode = NOT_SET_VALUES; 5400 b->size = size; 5401 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5402 5403 /* build cache for off array entries formed */ 5404 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5405 b->donotstash = PETSC_FALSE; 5406 b->colmap = 0; 5407 b->garray = 0; 5408 b->roworiented = PETSC_TRUE; 5409 5410 /* stuff used for matrix vector multiply */ 5411 b->lvec = PETSC_NULL; 5412 b->Mvctx = PETSC_NULL; 5413 5414 /* stuff for MatGetRow() */ 5415 b->rowindices = 0; 5416 b->rowvalues = 0; 5417 b->getrowactive = PETSC_FALSE; 5418 5419 #if defined(PETSC_HAVE_SPOOLES) 5420 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5421 "MatGetFactor_mpiaij_spooles", 5422 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5423 #endif 5424 #if defined(PETSC_HAVE_MUMPS) 5425 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5426 "MatGetFactor_aij_mumps", 5427 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5428 #endif 5429 #if defined(PETSC_HAVE_PASTIX) 5430 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5431 "MatGetFactor_mpiaij_pastix", 5432 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5433 #endif 5434 #if defined(PETSC_HAVE_SUPERLU_DIST) 5435 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5436 "MatGetFactor_mpiaij_superlu_dist", 5437 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5438 #endif 5439 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5440 "MatStoreValues_MPIAIJ", 5441 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5442 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5443 "MatRetrieveValues_MPIAIJ", 5444 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5445 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5446 "MatGetDiagonalBlock_MPIAIJ", 5447 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5448 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5449 "MatIsTranspose_MPIAIJ", 5450 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5451 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5452 "MatMPIAIJSetPreallocation_MPIAIJ", 5453 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5454 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5455 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5456 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5457 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5458 "MatDiagonalScaleLocal_MPIAIJ", 5459 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5460 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5461 "MatConvert_MPIAIJ_MPIAIJPERM", 5462 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5463 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5464 "MatConvert_MPIAIJ_MPIAIJCRL", 5465 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5466 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5467 "MatConvert_MPIAIJ_MPISBAIJ", 5468 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5469 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5470 "MatMatMult_MPIDense_MPIAIJ", 5471 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5472 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5473 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5474 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5475 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5476 "MatMatMultNumeric_MPIDense_MPIAIJ", 5477 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5478 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5479 PetscFunctionReturn(0); 5480 } 5481 EXTERN_C_END 5482 5483 #undef __FUNCT__ 5484 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5485 /*@ 5486 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5487 and "off-diagonal" part of the matrix in CSR format. 5488 5489 Collective on MPI_Comm 5490 5491 Input Parameters: 5492 + comm - MPI communicator 5493 . m - number of local rows (Cannot be PETSC_DECIDE) 5494 . n - This value should be the same as the local size used in creating the 5495 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5496 calculated if N is given) For square matrices n is almost always m. 5497 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5498 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5499 . i - row indices for "diagonal" portion of matrix 5500 . j - column indices 5501 . a - matrix values 5502 . oi - row indices for "off-diagonal" portion of matrix 5503 . oj - column indices 5504 - oa - matrix values 5505 5506 Output Parameter: 5507 . mat - the matrix 5508 5509 Level: advanced 5510 5511 Notes: 5512 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5513 must free the arrays once the matrix has been destroyed and not before. 5514 5515 The i and j indices are 0 based 5516 5517 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5518 5519 This sets local rows and cannot be used to set off-processor values. 5520 5521 You cannot later use MatSetValues() to change values in this matrix. 5522 5523 .keywords: matrix, aij, compressed row, sparse, parallel 5524 5525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5526 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5527 @*/ 5528 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5529 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5530 { 5531 PetscErrorCode ierr; 5532 Mat_MPIAIJ *maij; 5533 5534 PetscFunctionBegin; 5535 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5536 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5537 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5538 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5539 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5540 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5541 maij = (Mat_MPIAIJ*) (*mat)->data; 5542 maij->donotstash = PETSC_TRUE; 5543 (*mat)->preallocated = PETSC_TRUE; 5544 5545 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5546 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5547 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5548 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5549 5550 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5551 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5552 5553 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5554 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5555 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5556 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5557 5558 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5559 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5560 PetscFunctionReturn(0); 5561 } 5562 5563 /* 5564 Special version for direct calls from Fortran 5565 */ 5566 #include <private/fortranimpl.h> 5567 5568 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5569 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5570 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5571 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5572 #endif 5573 5574 /* Change these macros so can be used in void function */ 5575 #undef CHKERRQ 5576 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5577 #undef SETERRQ2 5578 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5579 #undef SETERRQ 5580 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5581 5582 EXTERN_C_BEGIN 5583 #undef __FUNCT__ 5584 #define __FUNCT__ "matsetvaluesmpiaij_" 5585 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5586 { 5587 Mat mat = *mmat; 5588 PetscInt m = *mm, n = *mn; 5589 InsertMode addv = *maddv; 5590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5591 PetscScalar value; 5592 PetscErrorCode ierr; 5593 5594 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5595 if (mat->insertmode == NOT_SET_VALUES) { 5596 mat->insertmode = addv; 5597 } 5598 #if defined(PETSC_USE_DEBUG) 5599 else if (mat->insertmode != addv) { 5600 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5601 } 5602 #endif 5603 { 5604 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5605 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5606 PetscBool roworiented = aij->roworiented; 5607 5608 /* Some Variables required in the macro */ 5609 Mat A = aij->A; 5610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5611 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5612 MatScalar *aa = a->a; 5613 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5614 Mat B = aij->B; 5615 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5616 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5617 MatScalar *ba = b->a; 5618 5619 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5620 PetscInt nonew = a->nonew; 5621 MatScalar *ap1,*ap2; 5622 5623 PetscFunctionBegin; 5624 for (i=0; i<m; i++) { 5625 if (im[i] < 0) continue; 5626 #if defined(PETSC_USE_DEBUG) 5627 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5628 #endif 5629 if (im[i] >= rstart && im[i] < rend) { 5630 row = im[i] - rstart; 5631 lastcol1 = -1; 5632 rp1 = aj + ai[row]; 5633 ap1 = aa + ai[row]; 5634 rmax1 = aimax[row]; 5635 nrow1 = ailen[row]; 5636 low1 = 0; 5637 high1 = nrow1; 5638 lastcol2 = -1; 5639 rp2 = bj + bi[row]; 5640 ap2 = ba + bi[row]; 5641 rmax2 = bimax[row]; 5642 nrow2 = bilen[row]; 5643 low2 = 0; 5644 high2 = nrow2; 5645 5646 for (j=0; j<n; j++) { 5647 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5648 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5649 if (in[j] >= cstart && in[j] < cend){ 5650 col = in[j] - cstart; 5651 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5652 } else if (in[j] < 0) continue; 5653 #if defined(PETSC_USE_DEBUG) 5654 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5655 #endif 5656 else { 5657 if (mat->was_assembled) { 5658 if (!aij->colmap) { 5659 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5660 } 5661 #if defined (PETSC_USE_CTABLE) 5662 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5663 col--; 5664 #else 5665 col = aij->colmap[in[j]] - 1; 5666 #endif 5667 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5668 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5669 col = in[j]; 5670 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5671 B = aij->B; 5672 b = (Mat_SeqAIJ*)B->data; 5673 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5674 rp2 = bj + bi[row]; 5675 ap2 = ba + bi[row]; 5676 rmax2 = bimax[row]; 5677 nrow2 = bilen[row]; 5678 low2 = 0; 5679 high2 = nrow2; 5680 bm = aij->B->rmap->n; 5681 ba = b->a; 5682 } 5683 } else col = in[j]; 5684 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5685 } 5686 } 5687 } else { 5688 if (!aij->donotstash) { 5689 if (roworiented) { 5690 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5691 } else { 5692 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5693 } 5694 } 5695 } 5696 }} 5697 PetscFunctionReturnVoid(); 5698 } 5699 EXTERN_C_END 5700 5701