1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,ignorezeroentries && (addv == ADD_VALUES));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,ignorezeroentries && (addv == ADD_VALUES));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) { 468 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 } 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 643 source = lens + nrecvs; 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree(lens);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 Contributed by: Matthew Knepley 678 */ 679 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 680 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 681 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 682 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 683 } else if (diag != 0.0) { 684 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 685 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 686 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 688 } 689 for (i = 0; i < slen; i++) { 690 row = lrows[i] + rstart; 691 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 } else { 696 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 697 } 698 ierr = PetscFree(lrows);CHKERRQ(ierr); 699 700 /* wait on sends */ 701 if (nsends) { 702 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 703 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 704 ierr = PetscFree(send_status);CHKERRQ(ierr); 705 } 706 ierr = PetscFree(send_waits);CHKERRQ(ierr); 707 ierr = PetscFree(svalues);CHKERRQ(ierr); 708 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatMult_MPIAIJ" 714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscInt nt; 719 720 PetscFunctionBegin; 721 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 722 if (nt != A->cmap->n) { 723 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 724 } 725 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 727 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 735 { 736 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatMultAdd_MPIAIJ" 746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 747 { 748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 754 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 762 { 763 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 PetscTruth merged; 766 767 PetscFunctionBegin; 768 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 769 /* do nondiagonal part */ 770 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 771 if (!merged) { 772 /* send it on its way */ 773 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* receive remote parts: note this assumes the values are not actually */ 777 /* added in yy until the next line, */ 778 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 779 } else { 780 /* do local part */ 781 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 782 /* send it on its way */ 783 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 /* values actually were received in the Begin() but we need to call this nop */ 785 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 EXTERN_C_BEGIN 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 794 { 795 MPI_Comm comm; 796 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 797 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 798 IS Me,Notme; 799 PetscErrorCode ierr; 800 PetscInt M,N,first,last,*notme,i; 801 PetscMPIInt size; 802 803 PetscFunctionBegin; 804 805 /* Easy test: symmetric diagonal block */ 806 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 807 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 808 if (!*f) PetscFunctionReturn(0); 809 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 810 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 811 if (size == 1) PetscFunctionReturn(0); 812 813 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 814 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 815 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 816 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 817 for (i=0; i<first; i++) notme[i] = i; 818 for (i=last; i<M; i++) notme[i-last+first] = i; 819 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 820 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 821 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 822 Aoff = Aoffs[0]; 823 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 824 Boff = Boffs[0]; 825 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 828 ierr = ISDestroy(Me);CHKERRQ(ierr); 829 ierr = ISDestroy(Notme);CHKERRQ(ierr); 830 831 PetscFunctionReturn(0); 832 } 833 EXTERN_C_END 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 838 { 839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 /* do nondiagonal part */ 844 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 845 /* send it on its way */ 846 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 847 /* do local part */ 848 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 849 /* receive remote parts */ 850 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 /* 855 This only works correctly for square matrices where the subblock A->A is the 856 diagonal block 857 */ 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 861 { 862 PetscErrorCode ierr; 863 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 864 865 PetscFunctionBegin; 866 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 867 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 868 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 } 870 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatScale_MPIAIJ" 876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 877 { 878 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 883 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatDestroy_MPIAIJ" 889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 890 { 891 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 892 PetscErrorCode ierr; 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_LOG) 896 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 897 #endif 898 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 899 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 900 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 901 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 902 #if defined (PETSC_USE_CTABLE) 903 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 904 #else 905 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 906 #endif 907 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 908 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 909 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 910 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 911 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 912 ierr = PetscFree(aij);CHKERRQ(ierr); 913 914 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_COOKIE; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatRelax_MPIAIJ" 1210 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1223 if (flag & SOR_ZERO_INITIAL_GUESS) { 1224 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1225 its--; 1226 } 1227 1228 while (its--) { 1229 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 1232 /* update rhs: bb1 = bb - B*x */ 1233 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1234 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1235 1236 /* local sweep */ 1237 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1238 } 1239 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1240 if (flag & SOR_ZERO_INITIAL_GUESS) { 1241 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1242 its--; 1243 } 1244 while (its--) { 1245 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 1248 /* update rhs: bb1 = bb - B*x */ 1249 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1251 1252 /* local sweep */ 1253 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1254 } 1255 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1256 if (flag & SOR_ZERO_INITIAL_GUESS) { 1257 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1258 its--; 1259 } 1260 while (its--) { 1261 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 1264 /* update rhs: bb1 = bb - B*x */ 1265 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1266 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1267 1268 /* local sweep */ 1269 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1270 } 1271 } else if (flag & SOR_EISENSTAT) { 1272 Vec xx1; 1273 1274 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1275 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1276 1277 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 if (!mat->diag) { 1280 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1281 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1282 } 1283 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1284 if (hasop) { 1285 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1286 } else { 1287 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1288 } 1289 ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr); 1290 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1291 1292 /* local sweep */ 1293 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1294 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1295 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1296 } else { 1297 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1298 } 1299 1300 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatPermute_MPIAIJ" 1306 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1307 { 1308 MPI_Comm comm,pcomm; 1309 PetscInt first,local_size,nrows; 1310 const PetscInt *rows; 1311 PetscMPIInt size; 1312 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1317 /* make a collective version of 'rowp' */ 1318 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1319 if (pcomm==comm) { 1320 crowp = rowp; 1321 } else { 1322 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1323 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1324 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1325 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1326 } 1327 /* collect the global row permutation and invert it */ 1328 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1329 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1330 if (pcomm!=comm) { 1331 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1332 } 1333 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1334 /* get the local target indices */ 1335 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1337 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1338 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1339 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1340 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1341 /* the column permutation is so much easier; 1342 make a local version of 'colp' and invert it */ 1343 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1344 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1345 if (size==1) { 1346 lcolp = colp; 1347 } else { 1348 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1349 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1350 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1351 } 1352 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1353 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1354 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1355 if (size>1) { 1356 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1357 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1358 } 1359 /* now we just get the submatrix */ 1360 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1361 /* clean up */ 1362 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1363 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1369 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1370 { 1371 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1372 Mat A = mat->A,B = mat->B; 1373 PetscErrorCode ierr; 1374 PetscReal isend[5],irecv[5]; 1375 1376 PetscFunctionBegin; 1377 info->block_size = 1.0; 1378 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1379 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1380 isend[3] = info->memory; isend[4] = info->mallocs; 1381 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1383 isend[3] += info->memory; isend[4] += info->mallocs; 1384 if (flag == MAT_LOCAL) { 1385 info->nz_used = isend[0]; 1386 info->nz_allocated = isend[1]; 1387 info->nz_unneeded = isend[2]; 1388 info->memory = isend[3]; 1389 info->mallocs = isend[4]; 1390 } else if (flag == MAT_GLOBAL_MAX) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else if (flag == MAT_GLOBAL_SUM) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } 1405 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1406 info->fill_ratio_needed = 0; 1407 info->factor_mallocs = 0; 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatSetOption_MPIAIJ" 1414 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1415 { 1416 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 switch (op) { 1421 case MAT_NEW_NONZERO_LOCATIONS: 1422 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1423 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1424 case MAT_KEEP_NONZERO_PATTERN: 1425 case MAT_NEW_NONZERO_LOCATION_ERR: 1426 case MAT_USE_INODES: 1427 case MAT_IGNORE_ZERO_ENTRIES: 1428 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1429 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1430 break; 1431 case MAT_ROW_ORIENTED: 1432 a->roworiented = flg; 1433 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1434 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1435 break; 1436 case MAT_NEW_DIAGONALS: 1437 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1438 break; 1439 case MAT_IGNORE_OFF_PROC_ENTRIES: 1440 a->donotstash = PETSC_TRUE; 1441 break; 1442 case MAT_SYMMETRIC: 1443 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1444 break; 1445 case MAT_STRUCTURALLY_SYMMETRIC: 1446 case MAT_HERMITIAN: 1447 case MAT_SYMMETRY_ETERNAL: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 default: 1451 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatGetRow_MPIAIJ" 1458 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1459 { 1460 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1461 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1462 PetscErrorCode ierr; 1463 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1464 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1465 PetscInt *cmap,*idx_p; 1466 1467 PetscFunctionBegin; 1468 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1469 mat->getrowactive = PETSC_TRUE; 1470 1471 if (!mat->rowvalues && (idx || v)) { 1472 /* 1473 allocate enough space to hold information from the longest row. 1474 */ 1475 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1476 PetscInt max = 1,tmp; 1477 for (i=0; i<matin->rmap->n; i++) { 1478 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1479 if (max < tmp) { max = tmp; } 1480 } 1481 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1482 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1483 } 1484 1485 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1486 lrow = row - rstart; 1487 1488 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1489 if (!v) {pvA = 0; pvB = 0;} 1490 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1491 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1492 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1493 nztot = nzA + nzB; 1494 1495 cmap = mat->garray; 1496 if (v || idx) { 1497 if (nztot) { 1498 /* Sort by increasing column numbers, assuming A and B already sorted */ 1499 PetscInt imark = -1; 1500 if (v) { 1501 *v = v_p = mat->rowvalues; 1502 for (i=0; i<nzB; i++) { 1503 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1504 else break; 1505 } 1506 imark = i; 1507 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1508 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1509 } 1510 if (idx) { 1511 *idx = idx_p = mat->rowindices; 1512 if (imark > -1) { 1513 for (i=0; i<imark; i++) { 1514 idx_p[i] = cmap[cworkB[i]]; 1515 } 1516 } else { 1517 for (i=0; i<nzB; i++) { 1518 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1519 else break; 1520 } 1521 imark = i; 1522 } 1523 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1524 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1525 } 1526 } else { 1527 if (idx) *idx = 0; 1528 if (v) *v = 0; 1529 } 1530 } 1531 *nz = nztot; 1532 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1533 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1539 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1540 { 1541 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1542 1543 PetscFunctionBegin; 1544 if (!aij->getrowactive) { 1545 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1546 } 1547 aij->getrowactive = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatNorm_MPIAIJ" 1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1556 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1557 PetscErrorCode ierr; 1558 PetscInt i,j,cstart = mat->cmap->rstart; 1559 PetscReal sum = 0.0; 1560 MatScalar *v; 1561 1562 PetscFunctionBegin; 1563 if (aij->size == 1) { 1564 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1565 } else { 1566 if (type == NORM_FROBENIUS) { 1567 v = amat->a; 1568 for (i=0; i<amat->nz; i++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1571 #else 1572 sum += (*v)*(*v); v++; 1573 #endif 1574 } 1575 v = bmat->a; 1576 for (i=0; i<bmat->nz; i++) { 1577 #if defined(PETSC_USE_COMPLEX) 1578 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1579 #else 1580 sum += (*v)*(*v); v++; 1581 #endif 1582 } 1583 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1584 *norm = sqrt(*norm); 1585 } else if (type == NORM_1) { /* max column norm */ 1586 PetscReal *tmp,*tmp2; 1587 PetscInt *jj,*garray = aij->garray; 1588 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1589 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1590 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1591 *norm = 0.0; 1592 v = amat->a; jj = amat->j; 1593 for (j=0; j<amat->nz; j++) { 1594 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1595 } 1596 v = bmat->a; jj = bmat->j; 1597 for (j=0; j<bmat->nz; j++) { 1598 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1599 } 1600 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 for (j=0; j<mat->cmap->N; j++) { 1602 if (tmp2[j] > *norm) *norm = tmp2[j]; 1603 } 1604 ierr = PetscFree(tmp);CHKERRQ(ierr); 1605 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1606 } else if (type == NORM_INFINITY) { /* max row norm */ 1607 PetscReal ntemp = 0.0; 1608 for (j=0; j<aij->A->rmap->n; j++) { 1609 v = amat->a + amat->i[j]; 1610 sum = 0.0; 1611 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1612 sum += PetscAbsScalar(*v); v++; 1613 } 1614 v = bmat->a + bmat->i[j]; 1615 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 if (sum > ntemp) ntemp = sum; 1619 } 1620 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1621 } else { 1622 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1623 } 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIAIJ" 1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1633 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1634 PetscErrorCode ierr; 1635 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1636 PetscInt cstart=A->cmap->rstart,ncol; 1637 Mat B; 1638 MatScalar *array; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 1643 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1644 ai = Aloc->i; aj = Aloc->j; 1645 bi = Bloc->i; bj = Bloc->j; 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1648 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1649 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1650 for (i=0; i<ai[ma]; i++){ 1651 d_nnz[aj[i]] ++; 1652 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1653 } 1654 1655 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1656 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1657 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1658 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1659 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1660 } else { 1661 B = *matout; 1662 } 1663 1664 /* copy over the A part */ 1665 array = Aloc->a; 1666 row = A->rmap->rstart; 1667 for (i=0; i<ma; i++) { 1668 ncol = ai[i+1]-ai[i]; 1669 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1670 row++; array += ncol; aj += ncol; 1671 } 1672 aj = Aloc->j; 1673 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1674 1675 /* copy over the B part */ 1676 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1677 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1678 array = Bloc->a; 1679 row = A->rmap->rstart; 1680 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1681 cols_tmp = cols; 1682 for (i=0; i<mb; i++) { 1683 ncol = bi[i+1]-bi[i]; 1684 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1685 row++; array += ncol; cols_tmp += ncol; 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1692 *matout = B; 1693 } else { 1694 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1702 { 1703 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1704 Mat a = aij->A,b = aij->B; 1705 PetscErrorCode ierr; 1706 PetscInt s1,s2,s3; 1707 1708 PetscFunctionBegin; 1709 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1710 if (rr) { 1711 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1712 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1713 /* Overlap communication with computation. */ 1714 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 } 1716 if (ll) { 1717 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1718 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1719 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1720 } 1721 /* scale the diagonal block */ 1722 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1723 1724 if (rr) { 1725 /* Do a scatter end and then right scale the off-diagonal block */ 1726 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1728 } 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1742 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1747 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1748 { 1749 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1750 PetscErrorCode ierr; 1751 1752 PetscFunctionBegin; 1753 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatEqual_MPIAIJ" 1759 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1760 { 1761 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1762 Mat a,b,c,d; 1763 PetscTruth flg; 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 a = matA->A; b = matA->B; 1768 c = matB->A; d = matB->B; 1769 1770 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1771 if (flg) { 1772 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1773 } 1774 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatCopy_MPIAIJ" 1780 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1781 { 1782 PetscErrorCode ierr; 1783 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1784 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1785 1786 PetscFunctionBegin; 1787 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1788 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1789 /* because of the column compression in the off-processor part of the matrix a->B, 1790 the number of columns in a->B and b->B may be different, hence we cannot call 1791 the MatCopy() directly on the two parts. If need be, we can provide a more 1792 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1793 then copying the submatrices */ 1794 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1795 } else { 1796 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1797 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1798 } 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1804 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1805 { 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #include "petscblaslapack.h" 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatAXPY_MPIAIJ" 1816 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1817 { 1818 PetscErrorCode ierr; 1819 PetscInt i; 1820 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1821 PetscBLASInt bnz,one=1; 1822 Mat_SeqAIJ *x,*y; 1823 1824 PetscFunctionBegin; 1825 if (str == SAME_NONZERO_PATTERN) { 1826 PetscScalar alpha = a; 1827 x = (Mat_SeqAIJ *)xx->A->data; 1828 y = (Mat_SeqAIJ *)yy->A->data; 1829 bnz = PetscBLASIntCast(x->nz); 1830 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1831 x = (Mat_SeqAIJ *)xx->B->data; 1832 y = (Mat_SeqAIJ *)yy->B->data; 1833 bnz = PetscBLASIntCast(x->nz); 1834 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1835 } else if (str == SUBSET_NONZERO_PATTERN) { 1836 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1837 1838 x = (Mat_SeqAIJ *)xx->B->data; 1839 y = (Mat_SeqAIJ *)yy->B->data; 1840 if (y->xtoy && y->XtoY != xx->B) { 1841 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1842 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1843 } 1844 if (!y->xtoy) { /* get xtoy */ 1845 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1846 y->XtoY = xx->B; 1847 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1848 } 1849 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1850 } else { 1851 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatConjugate_MPIAIJ" 1860 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1861 { 1862 #if defined(PETSC_USE_COMPLEX) 1863 PetscErrorCode ierr; 1864 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1865 1866 PetscFunctionBegin; 1867 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1868 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1869 #else 1870 PetscFunctionBegin; 1871 #endif 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatRealPart_MPIAIJ" 1877 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1878 { 1879 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1880 PetscErrorCode ierr; 1881 1882 PetscFunctionBegin; 1883 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1884 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1890 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1891 { 1892 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1893 PetscErrorCode ierr; 1894 1895 PetscFunctionBegin; 1896 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1897 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #ifdef PETSC_HAVE_PBGL 1902 1903 #include <boost/parallel/mpi/bsp_process_group.hpp> 1904 #include <boost/graph/distributed/ilu_default_graph.hpp> 1905 #include <boost/graph/distributed/ilu_0_block.hpp> 1906 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1907 #include <boost/graph/distributed/petsc/interface.hpp> 1908 #include <boost/multi_array.hpp> 1909 #include <boost/parallel/distributed_property_map->hpp> 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1913 /* 1914 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1915 */ 1916 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1917 { 1918 namespace petsc = boost::distributed::petsc; 1919 1920 namespace graph_dist = boost::graph::distributed; 1921 using boost::graph::distributed::ilu_default::process_group_type; 1922 using boost::graph::ilu_permuted; 1923 1924 PetscTruth row_identity, col_identity; 1925 PetscContainer c; 1926 PetscInt m, n, M, N; 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1931 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1932 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1933 if (!row_identity || !col_identity) { 1934 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1935 } 1936 1937 process_group_type pg; 1938 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1939 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1940 lgraph_type& level_graph = *lgraph_p; 1941 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1942 1943 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1944 ilu_permuted(level_graph); 1945 1946 /* put together the new matrix */ 1947 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1948 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1949 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1950 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1951 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1952 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 1955 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1956 ierr = PetscContainerSetPointer(c, lgraph_p); 1957 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1963 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1964 { 1965 PetscFunctionBegin; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatSolve_MPIAIJ" 1971 /* 1972 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1973 */ 1974 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1975 { 1976 namespace graph_dist = boost::graph::distributed; 1977 1978 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1979 lgraph_type* lgraph_p; 1980 PetscContainer c; 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1985 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1986 ierr = VecCopy(b, x);CHKERRQ(ierr); 1987 1988 PetscScalar* array_x; 1989 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1990 PetscInt sx; 1991 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1992 1993 PetscScalar* array_b; 1994 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1995 PetscInt sb; 1996 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1997 1998 lgraph_type& level_graph = *lgraph_p; 1999 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2000 2001 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2002 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2003 ref_x(array_x, boost::extents[num_vertices(graph)]); 2004 2005 typedef boost::iterator_property_map<array_ref_type::iterator, 2006 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2007 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2008 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2009 2010 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2011 2012 PetscFunctionReturn(0); 2013 } 2014 #endif 2015 2016 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2017 PetscInt nzlocal,nsends,nrecvs; 2018 PetscMPIInt *send_rank; 2019 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2020 PetscScalar *sbuf_a,**rbuf_a; 2021 PetscErrorCode (*MatDestroy)(Mat); 2022 } Mat_Redundant; 2023 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2026 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2027 { 2028 PetscErrorCode ierr; 2029 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2030 PetscInt i; 2031 2032 PetscFunctionBegin; 2033 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2034 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2035 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2036 for (i=0; i<redund->nrecvs; i++){ 2037 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2038 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2039 } 2040 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2041 ierr = PetscFree(redund);CHKERRQ(ierr); 2042 PetscFunctionReturn(0); 2043 } 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatDestroy_MatRedundant" 2047 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2048 { 2049 PetscErrorCode ierr; 2050 PetscContainer container; 2051 Mat_Redundant *redund=PETSC_NULL; 2052 2053 PetscFunctionBegin; 2054 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2055 if (container) { 2056 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2057 } else { 2058 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2059 } 2060 A->ops->destroy = redund->MatDestroy; 2061 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2062 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2063 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2069 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2070 { 2071 PetscMPIInt rank,size; 2072 MPI_Comm comm=((PetscObject)mat)->comm; 2073 PetscErrorCode ierr; 2074 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2075 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2076 PetscInt *rowrange=mat->rmap->range; 2077 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2078 Mat A=aij->A,B=aij->B,C=*matredundant; 2079 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2080 PetscScalar *sbuf_a; 2081 PetscInt nzlocal=a->nz+b->nz; 2082 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2083 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2084 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2085 MatScalar *aworkA,*aworkB; 2086 PetscScalar *vals; 2087 PetscMPIInt tag1,tag2,tag3,imdex; 2088 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2089 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2090 MPI_Status recv_status,*send_status; 2091 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2092 PetscInt **rbuf_j=PETSC_NULL; 2093 PetscScalar **rbuf_a=PETSC_NULL; 2094 Mat_Redundant *redund=PETSC_NULL; 2095 PetscContainer container; 2096 2097 PetscFunctionBegin; 2098 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2099 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2100 2101 if (reuse == MAT_REUSE_MATRIX) { 2102 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2103 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2104 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2105 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2106 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2107 if (container) { 2108 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2109 } else { 2110 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2111 } 2112 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2113 2114 nsends = redund->nsends; 2115 nrecvs = redund->nrecvs; 2116 send_rank = redund->send_rank; recv_rank = send_rank + size; 2117 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2118 sbuf_j = redund->sbuf_j; 2119 sbuf_a = redund->sbuf_a; 2120 rbuf_j = redund->rbuf_j; 2121 rbuf_a = redund->rbuf_a; 2122 } 2123 2124 if (reuse == MAT_INITIAL_MATRIX){ 2125 PetscMPIInt subrank,subsize; 2126 PetscInt nleftover,np_subcomm; 2127 /* get the destination processors' id send_rank, nsends and nrecvs */ 2128 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2129 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2130 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2131 recv_rank = send_rank + size; 2132 np_subcomm = size/nsubcomm; 2133 nleftover = size - nsubcomm*np_subcomm; 2134 nsends = 0; nrecvs = 0; 2135 for (i=0; i<size; i++){ /* i=rank*/ 2136 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2137 send_rank[nsends] = i; nsends++; 2138 recv_rank[nrecvs++] = i; 2139 } 2140 } 2141 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2142 i = size-nleftover-1; 2143 j = 0; 2144 while (j < nsubcomm - nleftover){ 2145 send_rank[nsends++] = i; 2146 i--; j++; 2147 } 2148 } 2149 2150 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2151 for (i=0; i<nleftover; i++){ 2152 recv_rank[nrecvs++] = size-nleftover+i; 2153 } 2154 } 2155 2156 /* allocate sbuf_j, sbuf_a */ 2157 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2158 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2159 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2160 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2161 2162 /* copy mat's local entries into the buffers */ 2163 if (reuse == MAT_INITIAL_MATRIX){ 2164 rownz_max = 0; 2165 rptr = sbuf_j; 2166 cols = sbuf_j + rend-rstart + 1; 2167 vals = sbuf_a; 2168 rptr[0] = 0; 2169 for (i=0; i<rend-rstart; i++){ 2170 row = i + rstart; 2171 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2172 ncols = nzA + nzB; 2173 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2174 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2175 /* load the column indices for this row into cols */ 2176 lwrite = 0; 2177 for (l=0; l<nzB; l++) { 2178 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2179 vals[lwrite] = aworkB[l]; 2180 cols[lwrite++] = ctmp; 2181 } 2182 } 2183 for (l=0; l<nzA; l++){ 2184 vals[lwrite] = aworkA[l]; 2185 cols[lwrite++] = cstart + cworkA[l]; 2186 } 2187 for (l=0; l<nzB; l++) { 2188 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2189 vals[lwrite] = aworkB[l]; 2190 cols[lwrite++] = ctmp; 2191 } 2192 } 2193 vals += ncols; 2194 cols += ncols; 2195 rptr[i+1] = rptr[i] + ncols; 2196 if (rownz_max < ncols) rownz_max = ncols; 2197 } 2198 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2199 } else { /* only copy matrix values into sbuf_a */ 2200 rptr = sbuf_j; 2201 vals = sbuf_a; 2202 rptr[0] = 0; 2203 for (i=0; i<rend-rstart; i++){ 2204 row = i + rstart; 2205 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2206 ncols = nzA + nzB; 2207 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2208 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2209 lwrite = 0; 2210 for (l=0; l<nzB; l++) { 2211 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2212 } 2213 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2214 for (l=0; l<nzB; l++) { 2215 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2216 } 2217 vals += ncols; 2218 rptr[i+1] = rptr[i] + ncols; 2219 } 2220 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2221 2222 /* send nzlocal to others, and recv other's nzlocal */ 2223 /*--------------------------------------------------*/ 2224 if (reuse == MAT_INITIAL_MATRIX){ 2225 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2226 s_waits2 = s_waits3 + nsends; 2227 s_waits1 = s_waits2 + nsends; 2228 r_waits1 = s_waits1 + nsends; 2229 r_waits2 = r_waits1 + nrecvs; 2230 r_waits3 = r_waits2 + nrecvs; 2231 } else { 2232 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2233 r_waits3 = s_waits3 + nsends; 2234 } 2235 2236 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2237 if (reuse == MAT_INITIAL_MATRIX){ 2238 /* get new tags to keep the communication clean */ 2239 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2240 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2241 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2242 rbuf_nz = sbuf_nz + nsends; 2243 2244 /* post receives of other's nzlocal */ 2245 for (i=0; i<nrecvs; i++){ 2246 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2247 } 2248 /* send nzlocal to others */ 2249 for (i=0; i<nsends; i++){ 2250 sbuf_nz[i] = nzlocal; 2251 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2252 } 2253 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2254 count = nrecvs; 2255 while (count) { 2256 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2257 recv_rank[imdex] = recv_status.MPI_SOURCE; 2258 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2259 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2260 2261 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2262 rbuf_nz[imdex] += i + 2; 2263 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2264 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2265 count--; 2266 } 2267 /* wait on sends of nzlocal */ 2268 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2269 /* send mat->i,j to others, and recv from other's */ 2270 /*------------------------------------------------*/ 2271 for (i=0; i<nsends; i++){ 2272 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2273 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2274 } 2275 /* wait on receives of mat->i,j */ 2276 /*------------------------------*/ 2277 count = nrecvs; 2278 while (count) { 2279 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2280 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2281 count--; 2282 } 2283 /* wait on sends of mat->i,j */ 2284 /*---------------------------*/ 2285 if (nsends) { 2286 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2287 } 2288 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2289 2290 /* post receives, send and receive mat->a */ 2291 /*----------------------------------------*/ 2292 for (imdex=0; imdex<nrecvs; imdex++) { 2293 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2294 } 2295 for (i=0; i<nsends; i++){ 2296 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2297 } 2298 count = nrecvs; 2299 while (count) { 2300 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2301 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2302 count--; 2303 } 2304 if (nsends) { 2305 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2306 } 2307 2308 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2309 2310 /* create redundant matrix */ 2311 /*-------------------------*/ 2312 if (reuse == MAT_INITIAL_MATRIX){ 2313 /* compute rownz_max for preallocation */ 2314 for (imdex=0; imdex<nrecvs; imdex++){ 2315 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2316 rptr = rbuf_j[imdex]; 2317 for (i=0; i<j; i++){ 2318 ncols = rptr[i+1] - rptr[i]; 2319 if (rownz_max < ncols) rownz_max = ncols; 2320 } 2321 } 2322 2323 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2324 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2325 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2326 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2327 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2328 } else { 2329 C = *matredundant; 2330 } 2331 2332 /* insert local matrix entries */ 2333 rptr = sbuf_j; 2334 cols = sbuf_j + rend-rstart + 1; 2335 vals = sbuf_a; 2336 for (i=0; i<rend-rstart; i++){ 2337 row = i + rstart; 2338 ncols = rptr[i+1] - rptr[i]; 2339 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2340 vals += ncols; 2341 cols += ncols; 2342 } 2343 /* insert received matrix entries */ 2344 for (imdex=0; imdex<nrecvs; imdex++){ 2345 rstart = rowrange[recv_rank[imdex]]; 2346 rend = rowrange[recv_rank[imdex]+1]; 2347 rptr = rbuf_j[imdex]; 2348 cols = rbuf_j[imdex] + rend-rstart + 1; 2349 vals = rbuf_a[imdex]; 2350 for (i=0; i<rend-rstart; i++){ 2351 row = i + rstart; 2352 ncols = rptr[i+1] - rptr[i]; 2353 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2354 vals += ncols; 2355 cols += ncols; 2356 } 2357 } 2358 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2361 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2362 if (reuse == MAT_INITIAL_MATRIX){ 2363 PetscContainer container; 2364 *matredundant = C; 2365 /* create a supporting struct and attach it to C for reuse */ 2366 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2367 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2368 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2369 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2370 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2371 2372 redund->nzlocal = nzlocal; 2373 redund->nsends = nsends; 2374 redund->nrecvs = nrecvs; 2375 redund->send_rank = send_rank; 2376 redund->sbuf_nz = sbuf_nz; 2377 redund->sbuf_j = sbuf_j; 2378 redund->sbuf_a = sbuf_a; 2379 redund->rbuf_j = rbuf_j; 2380 redund->rbuf_a = rbuf_a; 2381 2382 redund->MatDestroy = C->ops->destroy; 2383 C->ops->destroy = MatDestroy_MatRedundant; 2384 } 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->rmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++){ 2415 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 if (idxb) { 2424 ierr = PetscFree(idxb);CHKERRQ(ierr); 2425 } 2426 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2432 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2435 PetscErrorCode ierr; 2436 PetscInt i,*idxb = 0; 2437 PetscScalar *va,*vb; 2438 Vec vtmp; 2439 2440 PetscFunctionBegin; 2441 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2442 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2443 if (idx) { 2444 for (i=0; i<A->cmap->n; i++) { 2445 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2446 } 2447 } 2448 2449 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2450 if (idx) { 2451 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2452 } 2453 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2454 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2455 2456 for (i=0; i<A->rmap->n; i++){ 2457 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2458 va[i] = vb[i]; 2459 if (idx) idx[i] = a->garray[idxb[i]]; 2460 } 2461 } 2462 2463 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2465 if (idxb) { 2466 ierr = PetscFree(idxb);CHKERRQ(ierr); 2467 } 2468 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2474 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2475 { 2476 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2477 PetscInt n = A->rmap->n; 2478 PetscInt cstart = A->cmap->rstart; 2479 PetscInt *cmap = mat->garray; 2480 PetscInt *diagIdx, *offdiagIdx; 2481 Vec diagV, offdiagV; 2482 PetscScalar *a, *diagA, *offdiagA; 2483 PetscInt r; 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2489 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2490 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2491 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2492 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2493 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2494 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2495 for(r = 0; r < n; ++r) { 2496 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2497 a[r] = diagA[r]; 2498 idx[r] = cstart + diagIdx[r]; 2499 } else { 2500 a[r] = offdiagA[r]; 2501 idx[r] = cmap[offdiagIdx[r]]; 2502 } 2503 } 2504 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2506 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2507 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2508 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2509 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2510 PetscFunctionReturn(0); 2511 } 2512 2513 #undef __FUNCT__ 2514 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2515 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2516 { 2517 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2518 PetscInt n = A->rmap->n; 2519 PetscInt cstart = A->cmap->rstart; 2520 PetscInt *cmap = mat->garray; 2521 PetscInt *diagIdx, *offdiagIdx; 2522 Vec diagV, offdiagV; 2523 PetscScalar *a, *diagA, *offdiagA; 2524 PetscInt r; 2525 PetscErrorCode ierr; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2529 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2530 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2531 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2532 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2533 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2534 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2535 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2536 for(r = 0; r < n; ++r) { 2537 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2538 a[r] = diagA[r]; 2539 idx[r] = cstart + diagIdx[r]; 2540 } else { 2541 a[r] = offdiagA[r]; 2542 idx[r] = cmap[offdiagIdx[r]]; 2543 } 2544 } 2545 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2546 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2547 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2548 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2549 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2550 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2556 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[]) 2557 { 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 /* -------------------------------------------------------------------*/ 2566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2567 MatGetRow_MPIAIJ, 2568 MatRestoreRow_MPIAIJ, 2569 MatMult_MPIAIJ, 2570 /* 4*/ MatMultAdd_MPIAIJ, 2571 MatMultTranspose_MPIAIJ, 2572 MatMultTransposeAdd_MPIAIJ, 2573 #ifdef PETSC_HAVE_PBGL 2574 MatSolve_MPIAIJ, 2575 #else 2576 0, 2577 #endif 2578 0, 2579 0, 2580 /*10*/ 0, 2581 0, 2582 0, 2583 MatRelax_MPIAIJ, 2584 MatTranspose_MPIAIJ, 2585 /*15*/ MatGetInfo_MPIAIJ, 2586 MatEqual_MPIAIJ, 2587 MatGetDiagonal_MPIAIJ, 2588 MatDiagonalScale_MPIAIJ, 2589 MatNorm_MPIAIJ, 2590 /*20*/ MatAssemblyBegin_MPIAIJ, 2591 MatAssemblyEnd_MPIAIJ, 2592 MatSetOption_MPIAIJ, 2593 MatZeroEntries_MPIAIJ, 2594 /*24*/ MatZeroRows_MPIAIJ, 2595 0, 2596 #ifdef PETSC_HAVE_PBGL 2597 0, 2598 #else 2599 0, 2600 #endif 2601 0, 2602 0, 2603 /*29*/ MatSetUpPreallocation_MPIAIJ, 2604 #ifdef PETSC_HAVE_PBGL 2605 0, 2606 #else 2607 0, 2608 #endif 2609 0, 2610 0, 2611 0, 2612 /*34*/ MatDuplicate_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*39*/ MatAXPY_MPIAIJ, 2618 MatGetSubMatrices_MPIAIJ, 2619 MatIncreaseOverlap_MPIAIJ, 2620 MatGetValues_MPIAIJ, 2621 MatCopy_MPIAIJ, 2622 /*44*/ MatGetRowMax_MPIAIJ, 2623 MatScale_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 /*49*/ MatSetBlockSize_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /*54*/ MatFDColoringCreate_MPIAIJ, 2633 0, 2634 MatSetUnfactored_MPIAIJ, 2635 MatPermute_MPIAIJ, 2636 0, 2637 /*59*/ MatGetSubMatrix_MPIAIJ, 2638 MatDestroy_MPIAIJ, 2639 MatView_MPIAIJ, 2640 0, 2641 0, 2642 /*64*/ 0, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2648 MatGetRowMinAbs_MPIAIJ, 2649 0, 2650 MatSetColoring_MPIAIJ, 2651 #if defined(PETSC_HAVE_ADIC) 2652 MatSetValuesAdic_MPIAIJ, 2653 #else 2654 0, 2655 #endif 2656 MatSetValuesAdifor_MPIAIJ, 2657 /*75*/ 0, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*80*/ 0, 2663 0, 2664 0, 2665 /*83*/ MatLoad_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2672 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2673 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2674 MatPtAP_Basic, 2675 MatPtAPSymbolic_MPIAIJ, 2676 /*94*/ MatPtAPNumeric_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*99*/ 0, 2682 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2683 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2684 MatConjugate_MPIAIJ, 2685 0, 2686 /*104*/MatSetValuesRow_MPIAIJ, 2687 MatRealPart_MPIAIJ, 2688 MatImaginaryPart_MPIAIJ, 2689 0, 2690 0, 2691 /*109*/0, 2692 MatGetRedundantMatrix_MPIAIJ, 2693 MatGetRowMin_MPIAIJ, 2694 0, 2695 0, 2696 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2697 0, 2698 0, 2699 0, 2700 0, 2701 0 2702 }; 2703 2704 /* ----------------------------------------------------------------------------------------*/ 2705 2706 EXTERN_C_BEGIN 2707 #undef __FUNCT__ 2708 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2709 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2710 { 2711 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2712 PetscErrorCode ierr; 2713 2714 PetscFunctionBegin; 2715 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2716 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2717 PetscFunctionReturn(0); 2718 } 2719 EXTERN_C_END 2720 2721 EXTERN_C_BEGIN 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2724 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2725 { 2726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2731 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 EXTERN_C_END 2735 2736 #include "petscpc.h" 2737 EXTERN_C_BEGIN 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2740 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2741 { 2742 Mat_MPIAIJ *b; 2743 PetscErrorCode ierr; 2744 PetscInt i; 2745 2746 PetscFunctionBegin; 2747 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2748 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2749 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2750 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2751 2752 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2753 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2754 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2755 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2756 if (d_nnz) { 2757 for (i=0; i<B->rmap->n; i++) { 2758 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2759 } 2760 } 2761 if (o_nnz) { 2762 for (i=0; i<B->rmap->n; i++) { 2763 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2764 } 2765 } 2766 b = (Mat_MPIAIJ*)B->data; 2767 2768 if (!B->preallocated) { 2769 /* Explicitly create 2 MATSEQAIJ matrices. */ 2770 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2771 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2772 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2773 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2774 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2775 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2776 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2777 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2778 } 2779 2780 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2781 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2782 B->preallocated = PETSC_TRUE; 2783 PetscFunctionReturn(0); 2784 } 2785 EXTERN_C_END 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2789 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2790 { 2791 Mat mat; 2792 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2793 PetscErrorCode ierr; 2794 2795 PetscFunctionBegin; 2796 *newmat = 0; 2797 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2798 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2799 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2800 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2801 a = (Mat_MPIAIJ*)mat->data; 2802 2803 mat->factor = matin->factor; 2804 mat->rmap->bs = matin->rmap->bs; 2805 mat->assembled = PETSC_TRUE; 2806 mat->insertmode = NOT_SET_VALUES; 2807 mat->preallocated = PETSC_TRUE; 2808 2809 a->size = oldmat->size; 2810 a->rank = oldmat->rank; 2811 a->donotstash = oldmat->donotstash; 2812 a->roworiented = oldmat->roworiented; 2813 a->rowindices = 0; 2814 a->rowvalues = 0; 2815 a->getrowactive = PETSC_FALSE; 2816 2817 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2818 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2819 2820 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2821 if (oldmat->colmap) { 2822 #if defined (PETSC_USE_CTABLE) 2823 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2824 #else 2825 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2826 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2827 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2828 #endif 2829 } else a->colmap = 0; 2830 if (oldmat->garray) { 2831 PetscInt len; 2832 len = oldmat->B->cmap->n; 2833 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2834 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2835 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2836 } else a->garray = 0; 2837 2838 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2839 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2840 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2841 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2842 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2844 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2846 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2847 *newmat = mat; 2848 PetscFunctionReturn(0); 2849 } 2850 2851 #include "petscsys.h" 2852 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatLoad_MPIAIJ" 2855 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2856 { 2857 Mat A; 2858 PetscScalar *vals,*svals; 2859 MPI_Comm comm = ((PetscObject)viewer)->comm; 2860 MPI_Status status; 2861 PetscErrorCode ierr; 2862 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2863 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2864 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2865 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2866 PetscInt cend,cstart,n,*rowners; 2867 int fd; 2868 2869 PetscFunctionBegin; 2870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2871 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2872 if (!rank) { 2873 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2874 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2875 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2876 } 2877 2878 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2879 M = header[1]; N = header[2]; 2880 /* determine ownership of all rows */ 2881 m = M/size + ((M % size) > rank); 2882 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2883 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2884 2885 /* First process needs enough room for process with most rows */ 2886 if (!rank) { 2887 mmax = rowners[1]; 2888 for (i=2; i<size; i++) { 2889 mmax = PetscMax(mmax,rowners[i]); 2890 } 2891 } else mmax = m; 2892 2893 rowners[0] = 0; 2894 for (i=2; i<=size; i++) { 2895 rowners[i] += rowners[i-1]; 2896 } 2897 rstart = rowners[rank]; 2898 rend = rowners[rank+1]; 2899 2900 /* distribute row lengths to all processors */ 2901 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2902 if (!rank) { 2903 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2904 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2905 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2906 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2907 for (j=0; j<m; j++) { 2908 procsnz[0] += ourlens[j]; 2909 } 2910 for (i=1; i<size; i++) { 2911 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2912 /* calculate the number of nonzeros on each processor */ 2913 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2914 procsnz[i] += rowlengths[j]; 2915 } 2916 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2917 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2918 } 2919 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2920 } else { 2921 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2922 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2923 } 2924 2925 if (!rank) { 2926 /* determine max buffer needed and allocate it */ 2927 maxnz = 0; 2928 for (i=0; i<size; i++) { 2929 maxnz = PetscMax(maxnz,procsnz[i]); 2930 } 2931 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2932 2933 /* read in my part of the matrix column indices */ 2934 nz = procsnz[0]; 2935 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2936 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2937 2938 /* read in every one elses and ship off */ 2939 for (i=1; i<size; i++) { 2940 nz = procsnz[i]; 2941 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2942 mpicnt = PetscMPIIntCast(nz); 2943 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2944 } 2945 ierr = PetscFree(cols);CHKERRQ(ierr); 2946 } else { 2947 /* determine buffer space needed for message */ 2948 nz = 0; 2949 for (i=0; i<m; i++) { 2950 nz += ourlens[i]; 2951 } 2952 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2953 2954 /* receive message of column indices*/ 2955 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2956 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2957 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2958 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2959 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2960 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2961 } 2962 2963 /* determine column ownership if matrix is not square */ 2964 if (N != M) { 2965 n = N/size + ((N % size) > rank); 2966 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2967 cstart = cend - n; 2968 } else { 2969 cstart = rstart; 2970 cend = rend; 2971 n = cend - cstart; 2972 } 2973 2974 /* loop over local rows, determining number of off diagonal entries */ 2975 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2976 jj = 0; 2977 for (i=0; i<m; i++) { 2978 for (j=0; j<ourlens[i]; j++) { 2979 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2980 jj++; 2981 } 2982 } 2983 2984 /* create our matrix */ 2985 for (i=0; i<m; i++) { 2986 ourlens[i] -= offlens[i]; 2987 } 2988 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2989 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2990 ierr = MatSetType(A,type);CHKERRQ(ierr); 2991 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2992 2993 for (i=0; i<m; i++) { 2994 ourlens[i] += offlens[i]; 2995 } 2996 2997 if (!rank) { 2998 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2999 3000 /* read in my part of the matrix numerical values */ 3001 nz = procsnz[0]; 3002 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3003 3004 /* insert into matrix */ 3005 jj = rstart; 3006 smycols = mycols; 3007 svals = vals; 3008 for (i=0; i<m; i++) { 3009 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3010 smycols += ourlens[i]; 3011 svals += ourlens[i]; 3012 jj++; 3013 } 3014 3015 /* read in other processors and ship out */ 3016 for (i=1; i<size; i++) { 3017 nz = procsnz[i]; 3018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3019 mpicnt = PetscMPIIntCast(nz); 3020 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3021 } 3022 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3023 } else { 3024 /* receive numeric values */ 3025 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3026 3027 /* receive message of values*/ 3028 mpicnt = PetscMPIIntCast(nz); 3029 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3030 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3031 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3032 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3033 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3034 3035 /* insert into matrix */ 3036 jj = rstart; 3037 smycols = mycols; 3038 svals = vals; 3039 for (i=0; i<m; i++) { 3040 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3041 smycols += ourlens[i]; 3042 svals += ourlens[i]; 3043 jj++; 3044 } 3045 } 3046 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3047 ierr = PetscFree(vals);CHKERRQ(ierr); 3048 ierr = PetscFree(mycols);CHKERRQ(ierr); 3049 ierr = PetscFree(rowners);CHKERRQ(ierr); 3050 3051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 *newmat = A; 3054 PetscFunctionReturn(0); 3055 } 3056 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3059 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3060 { 3061 PetscErrorCode ierr; 3062 IS iscol_local; 3063 PetscInt csize; 3064 3065 PetscFunctionBegin; 3066 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3067 if (call == MAT_REUSE_MATRIX) { 3068 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3069 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3070 } else { 3071 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3072 } 3073 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3074 if (call == MAT_INITIAL_MATRIX) { 3075 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3076 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3077 } 3078 PetscFunctionReturn(0); 3079 } 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3083 /* 3084 Not great since it makes two copies of the submatrix, first an SeqAIJ 3085 in local and then by concatenating the local matrices the end result. 3086 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3087 3088 Note: This requires a sequential iscol with all indices. 3089 */ 3090 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3091 { 3092 PetscErrorCode ierr; 3093 PetscMPIInt rank,size; 3094 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3095 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3096 Mat *local,M,Mreuse; 3097 MatScalar *vwork,*aa; 3098 MPI_Comm comm = ((PetscObject)mat)->comm; 3099 Mat_SeqAIJ *aij; 3100 3101 3102 PetscFunctionBegin; 3103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3105 3106 if (call == MAT_REUSE_MATRIX) { 3107 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3108 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3109 local = &Mreuse; 3110 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3111 } else { 3112 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3113 Mreuse = *local; 3114 ierr = PetscFree(local);CHKERRQ(ierr); 3115 } 3116 3117 /* 3118 m - number of local rows 3119 n - number of columns (same on all processors) 3120 rstart - first row in new global matrix generated 3121 */ 3122 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3123 if (call == MAT_INITIAL_MATRIX) { 3124 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3125 ii = aij->i; 3126 jj = aij->j; 3127 3128 /* 3129 Determine the number of non-zeros in the diagonal and off-diagonal 3130 portions of the matrix in order to do correct preallocation 3131 */ 3132 3133 /* first get start and end of "diagonal" columns */ 3134 if (csize == PETSC_DECIDE) { 3135 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3136 if (mglobal == n) { /* square matrix */ 3137 nlocal = m; 3138 } else { 3139 nlocal = n/size + ((n % size) > rank); 3140 } 3141 } else { 3142 nlocal = csize; 3143 } 3144 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3145 rstart = rend - nlocal; 3146 if (rank == size - 1 && rend != n) { 3147 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3148 } 3149 3150 /* next, compute all the lengths */ 3151 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3152 olens = dlens + m; 3153 for (i=0; i<m; i++) { 3154 jend = ii[i+1] - ii[i]; 3155 olen = 0; 3156 dlen = 0; 3157 for (j=0; j<jend; j++) { 3158 if (*jj < rstart || *jj >= rend) olen++; 3159 else dlen++; 3160 jj++; 3161 } 3162 olens[i] = olen; 3163 dlens[i] = dlen; 3164 } 3165 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3166 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3167 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3168 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3169 ierr = PetscFree(dlens);CHKERRQ(ierr); 3170 } else { 3171 PetscInt ml,nl; 3172 3173 M = *newmat; 3174 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3175 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3176 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3177 /* 3178 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3179 rather than the slower MatSetValues(). 3180 */ 3181 M->was_assembled = PETSC_TRUE; 3182 M->assembled = PETSC_FALSE; 3183 } 3184 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3185 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3186 ii = aij->i; 3187 jj = aij->j; 3188 aa = aij->a; 3189 for (i=0; i<m; i++) { 3190 row = rstart + i; 3191 nz = ii[i+1] - ii[i]; 3192 cwork = jj; jj += nz; 3193 vwork = aa; aa += nz; 3194 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3195 } 3196 3197 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3198 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3199 *newmat = M; 3200 3201 /* save submatrix used in processor for next request */ 3202 if (call == MAT_INITIAL_MATRIX) { 3203 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3204 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3205 } 3206 3207 PetscFunctionReturn(0); 3208 } 3209 3210 EXTERN_C_BEGIN 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3213 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3214 { 3215 PetscInt m,cstart, cend,j,nnz,i,d; 3216 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3217 const PetscInt *JJ; 3218 PetscScalar *values; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3223 3224 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3225 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3226 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3227 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3228 m = B->rmap->n; 3229 cstart = B->cmap->rstart; 3230 cend = B->cmap->rend; 3231 rstart = B->rmap->rstart; 3232 3233 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3234 o_nnz = d_nnz + m; 3235 3236 #if defined(PETSC_USE_DEBUGGING) 3237 for (i=0; i<m; i++) { 3238 nnz = Ii[i+1]- Ii[i]; 3239 JJ = J + Ii[i]; 3240 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3241 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3242 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3243 for (j=1; j<nnz; j++) { 3244 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3245 } 3246 } 3247 #endif 3248 3249 for (i=0; i<m; i++) { 3250 nnz = Ii[i+1]- Ii[i]; 3251 JJ = J + Ii[i]; 3252 nnz_max = PetscMax(nnz_max,nnz); 3253 for (j=0; j<nnz; j++) { 3254 if (*JJ >= cstart) break; 3255 JJ++; 3256 } 3257 d = 0; 3258 for (; j<nnz; j++) { 3259 if (*JJ++ >= cend) break; 3260 d++; 3261 } 3262 d_nnz[i] = d; 3263 o_nnz[i] = nnz - d; 3264 } 3265 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3266 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3267 3268 if (v) values = (PetscScalar*)v; 3269 else { 3270 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3271 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3272 } 3273 3274 for (i=0; i<m; i++) { 3275 ii = i + rstart; 3276 nnz = Ii[i+1]- Ii[i]; 3277 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3278 } 3279 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3281 3282 if (!v) { 3283 ierr = PetscFree(values);CHKERRQ(ierr); 3284 } 3285 PetscFunctionReturn(0); 3286 } 3287 EXTERN_C_END 3288 3289 #undef __FUNCT__ 3290 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3291 /*@ 3292 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3293 (the default parallel PETSc format). 3294 3295 Collective on MPI_Comm 3296 3297 Input Parameters: 3298 + B - the matrix 3299 . i - the indices into j for the start of each local row (starts with zero) 3300 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3301 - v - optional values in the matrix 3302 3303 Level: developer 3304 3305 Notes: 3306 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3307 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3308 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3309 3310 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3311 3312 The format which is used for the sparse matrix input, is equivalent to a 3313 row-major ordering.. i.e for the following matrix, the input data expected is 3314 as shown: 3315 3316 1 0 0 3317 2 0 3 P0 3318 ------- 3319 4 5 6 P1 3320 3321 Process0 [P0]: rows_owned=[0,1] 3322 i = {0,1,3} [size = nrow+1 = 2+1] 3323 j = {0,0,2} [size = nz = 6] 3324 v = {1,2,3} [size = nz = 6] 3325 3326 Process1 [P1]: rows_owned=[2] 3327 i = {0,3} [size = nrow+1 = 1+1] 3328 j = {0,1,2} [size = nz = 6] 3329 v = {4,5,6} [size = nz = 6] 3330 3331 The column indices for each row MUST be sorted. 3332 3333 .keywords: matrix, aij, compressed row, sparse, parallel 3334 3335 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3336 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3337 @*/ 3338 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3339 { 3340 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3341 3342 PetscFunctionBegin; 3343 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3344 if (f) { 3345 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3346 } 3347 PetscFunctionReturn(0); 3348 } 3349 3350 #undef __FUNCT__ 3351 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3352 /*@C 3353 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3354 (the default parallel PETSc format). For good matrix assembly performance 3355 the user should preallocate the matrix storage by setting the parameters 3356 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3357 performance can be increased by more than a factor of 50. 3358 3359 Collective on MPI_Comm 3360 3361 Input Parameters: 3362 + A - the matrix 3363 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3364 (same value is used for all local rows) 3365 . d_nnz - array containing the number of nonzeros in the various rows of the 3366 DIAGONAL portion of the local submatrix (possibly different for each row) 3367 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3368 The size of this array is equal to the number of local rows, i.e 'm'. 3369 You must leave room for the diagonal entry even if it is zero. 3370 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3371 submatrix (same value is used for all local rows). 3372 - o_nnz - array containing the number of nonzeros in the various rows of the 3373 OFF-DIAGONAL portion of the local submatrix (possibly different for 3374 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3375 structure. The size of this array is equal to the number 3376 of local rows, i.e 'm'. 3377 3378 If the *_nnz parameter is given then the *_nz parameter is ignored 3379 3380 The AIJ format (also called the Yale sparse matrix format or 3381 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3382 storage. The stored row and column indices begin with zero. See the users manual for details. 3383 3384 The parallel matrix is partitioned such that the first m0 rows belong to 3385 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3386 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3387 3388 The DIAGONAL portion of the local submatrix of a processor can be defined 3389 as the submatrix which is obtained by extraction the part corresponding 3390 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3391 first row that belongs to the processor, and r2 is the last row belonging 3392 to the this processor. This is a square mxm matrix. The remaining portion 3393 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3394 3395 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3396 3397 You can call MatGetInfo() to get information on how effective the preallocation was; 3398 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3399 You can also run with the option -info and look for messages with the string 3400 malloc in them to see if additional memory allocation was needed. 3401 3402 Example usage: 3403 3404 Consider the following 8x8 matrix with 34 non-zero values, that is 3405 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3406 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3407 as follows: 3408 3409 .vb 3410 1 2 0 | 0 3 0 | 0 4 3411 Proc0 0 5 6 | 7 0 0 | 8 0 3412 9 0 10 | 11 0 0 | 12 0 3413 ------------------------------------- 3414 13 0 14 | 15 16 17 | 0 0 3415 Proc1 0 18 0 | 19 20 21 | 0 0 3416 0 0 0 | 22 23 0 | 24 0 3417 ------------------------------------- 3418 Proc2 25 26 27 | 0 0 28 | 29 0 3419 30 0 0 | 31 32 33 | 0 34 3420 .ve 3421 3422 This can be represented as a collection of submatrices as: 3423 3424 .vb 3425 A B C 3426 D E F 3427 G H I 3428 .ve 3429 3430 Where the submatrices A,B,C are owned by proc0, D,E,F are 3431 owned by proc1, G,H,I are owned by proc2. 3432 3433 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3434 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3435 The 'M','N' parameters are 8,8, and have the same values on all procs. 3436 3437 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3438 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3439 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3440 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3441 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3442 matrix, ans [DF] as another SeqAIJ matrix. 3443 3444 When d_nz, o_nz parameters are specified, d_nz storage elements are 3445 allocated for every row of the local diagonal submatrix, and o_nz 3446 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3447 One way to choose d_nz and o_nz is to use the max nonzerors per local 3448 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3449 In this case, the values of d_nz,o_nz are: 3450 .vb 3451 proc0 : dnz = 2, o_nz = 2 3452 proc1 : dnz = 3, o_nz = 2 3453 proc2 : dnz = 1, o_nz = 4 3454 .ve 3455 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3456 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3457 for proc3. i.e we are using 12+15+10=37 storage locations to store 3458 34 values. 3459 3460 When d_nnz, o_nnz parameters are specified, the storage is specified 3461 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3462 In the above case the values for d_nnz,o_nnz are: 3463 .vb 3464 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3465 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3466 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3467 .ve 3468 Here the space allocated is sum of all the above values i.e 34, and 3469 hence pre-allocation is perfect. 3470 3471 Level: intermediate 3472 3473 .keywords: matrix, aij, compressed row, sparse, parallel 3474 3475 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3476 MPIAIJ, MatGetInfo() 3477 @*/ 3478 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3479 { 3480 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3481 3482 PetscFunctionBegin; 3483 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3484 if (f) { 3485 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3486 } 3487 PetscFunctionReturn(0); 3488 } 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3492 /*@ 3493 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3494 CSR format the local rows. 3495 3496 Collective on MPI_Comm 3497 3498 Input Parameters: 3499 + comm - MPI communicator 3500 . m - number of local rows (Cannot be PETSC_DECIDE) 3501 . n - This value should be the same as the local size used in creating the 3502 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3503 calculated if N is given) For square matrices n is almost always m. 3504 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3505 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3506 . i - row indices 3507 . j - column indices 3508 - a - matrix values 3509 3510 Output Parameter: 3511 . mat - the matrix 3512 3513 Level: intermediate 3514 3515 Notes: 3516 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3517 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3518 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3519 3520 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3521 3522 The format which is used for the sparse matrix input, is equivalent to a 3523 row-major ordering.. i.e for the following matrix, the input data expected is 3524 as shown: 3525 3526 1 0 0 3527 2 0 3 P0 3528 ------- 3529 4 5 6 P1 3530 3531 Process0 [P0]: rows_owned=[0,1] 3532 i = {0,1,3} [size = nrow+1 = 2+1] 3533 j = {0,0,2} [size = nz = 6] 3534 v = {1,2,3} [size = nz = 6] 3535 3536 Process1 [P1]: rows_owned=[2] 3537 i = {0,3} [size = nrow+1 = 1+1] 3538 j = {0,1,2} [size = nz = 6] 3539 v = {4,5,6} [size = nz = 6] 3540 3541 .keywords: matrix, aij, compressed row, sparse, parallel 3542 3543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3544 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3545 @*/ 3546 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3547 { 3548 PetscErrorCode ierr; 3549 3550 PetscFunctionBegin; 3551 if (i[0]) { 3552 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3553 } 3554 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3555 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3556 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3557 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3558 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3559 PetscFunctionReturn(0); 3560 } 3561 3562 #undef __FUNCT__ 3563 #define __FUNCT__ "MatCreateMPIAIJ" 3564 /*@C 3565 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3566 (the default parallel PETSc format). For good matrix assembly performance 3567 the user should preallocate the matrix storage by setting the parameters 3568 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3569 performance can be increased by more than a factor of 50. 3570 3571 Collective on MPI_Comm 3572 3573 Input Parameters: 3574 + comm - MPI communicator 3575 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3576 This value should be the same as the local size used in creating the 3577 y vector for the matrix-vector product y = Ax. 3578 . n - This value should be the same as the local size used in creating the 3579 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3580 calculated if N is given) For square matrices n is almost always m. 3581 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3582 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3583 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3584 (same value is used for all local rows) 3585 . d_nnz - array containing the number of nonzeros in the various rows of the 3586 DIAGONAL portion of the local submatrix (possibly different for each row) 3587 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3588 The size of this array is equal to the number of local rows, i.e 'm'. 3589 You must leave room for the diagonal entry even if it is zero. 3590 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3591 submatrix (same value is used for all local rows). 3592 - o_nnz - array containing the number of nonzeros in the various rows of the 3593 OFF-DIAGONAL portion of the local submatrix (possibly different for 3594 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3595 structure. The size of this array is equal to the number 3596 of local rows, i.e 'm'. 3597 3598 Output Parameter: 3599 . A - the matrix 3600 3601 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3602 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3603 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3604 3605 Notes: 3606 If the *_nnz parameter is given then the *_nz parameter is ignored 3607 3608 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3609 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3610 storage requirements for this matrix. 3611 3612 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3613 processor than it must be used on all processors that share the object for 3614 that argument. 3615 3616 The user MUST specify either the local or global matrix dimensions 3617 (possibly both). 3618 3619 The parallel matrix is partitioned across processors such that the 3620 first m0 rows belong to process 0, the next m1 rows belong to 3621 process 1, the next m2 rows belong to process 2 etc.. where 3622 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3623 values corresponding to [m x N] submatrix. 3624 3625 The columns are logically partitioned with the n0 columns belonging 3626 to 0th partition, the next n1 columns belonging to the next 3627 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3628 3629 The DIAGONAL portion of the local submatrix on any given processor 3630 is the submatrix corresponding to the rows and columns m,n 3631 corresponding to the given processor. i.e diagonal matrix on 3632 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3633 etc. The remaining portion of the local submatrix [m x (N-n)] 3634 constitute the OFF-DIAGONAL portion. The example below better 3635 illustrates this concept. 3636 3637 For a square global matrix we define each processor's diagonal portion 3638 to be its local rows and the corresponding columns (a square submatrix); 3639 each processor's off-diagonal portion encompasses the remainder of the 3640 local matrix (a rectangular submatrix). 3641 3642 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3643 3644 When calling this routine with a single process communicator, a matrix of 3645 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3646 type of communicator, use the construction mechanism: 3647 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3648 3649 By default, this format uses inodes (identical nodes) when possible. 3650 We search for consecutive rows with the same nonzero structure, thereby 3651 reusing matrix information to achieve increased efficiency. 3652 3653 Options Database Keys: 3654 + -mat_no_inode - Do not use inodes 3655 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3656 - -mat_aij_oneindex - Internally use indexing starting at 1 3657 rather than 0. Note that when calling MatSetValues(), 3658 the user still MUST index entries starting at 0! 3659 3660 3661 Example usage: 3662 3663 Consider the following 8x8 matrix with 34 non-zero values, that is 3664 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3665 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3666 as follows: 3667 3668 .vb 3669 1 2 0 | 0 3 0 | 0 4 3670 Proc0 0 5 6 | 7 0 0 | 8 0 3671 9 0 10 | 11 0 0 | 12 0 3672 ------------------------------------- 3673 13 0 14 | 15 16 17 | 0 0 3674 Proc1 0 18 0 | 19 20 21 | 0 0 3675 0 0 0 | 22 23 0 | 24 0 3676 ------------------------------------- 3677 Proc2 25 26 27 | 0 0 28 | 29 0 3678 30 0 0 | 31 32 33 | 0 34 3679 .ve 3680 3681 This can be represented as a collection of submatrices as: 3682 3683 .vb 3684 A B C 3685 D E F 3686 G H I 3687 .ve 3688 3689 Where the submatrices A,B,C are owned by proc0, D,E,F are 3690 owned by proc1, G,H,I are owned by proc2. 3691 3692 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3693 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3694 The 'M','N' parameters are 8,8, and have the same values on all procs. 3695 3696 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3697 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3698 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3699 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3700 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3701 matrix, ans [DF] as another SeqAIJ matrix. 3702 3703 When d_nz, o_nz parameters are specified, d_nz storage elements are 3704 allocated for every row of the local diagonal submatrix, and o_nz 3705 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3706 One way to choose d_nz and o_nz is to use the max nonzerors per local 3707 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3708 In this case, the values of d_nz,o_nz are: 3709 .vb 3710 proc0 : dnz = 2, o_nz = 2 3711 proc1 : dnz = 3, o_nz = 2 3712 proc2 : dnz = 1, o_nz = 4 3713 .ve 3714 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3715 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3716 for proc3. i.e we are using 12+15+10=37 storage locations to store 3717 34 values. 3718 3719 When d_nnz, o_nnz parameters are specified, the storage is specified 3720 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3721 In the above case the values for d_nnz,o_nnz are: 3722 .vb 3723 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3724 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3725 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3726 .ve 3727 Here the space allocated is sum of all the above values i.e 34, and 3728 hence pre-allocation is perfect. 3729 3730 Level: intermediate 3731 3732 .keywords: matrix, aij, compressed row, sparse, parallel 3733 3734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3735 MPIAIJ, MatCreateMPIAIJWithArrays() 3736 @*/ 3737 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3738 { 3739 PetscErrorCode ierr; 3740 PetscMPIInt size; 3741 3742 PetscFunctionBegin; 3743 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3744 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3745 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3746 if (size > 1) { 3747 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3748 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3749 } else { 3750 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3751 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3752 } 3753 PetscFunctionReturn(0); 3754 } 3755 3756 #undef __FUNCT__ 3757 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3758 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3759 { 3760 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3761 3762 PetscFunctionBegin; 3763 *Ad = a->A; 3764 *Ao = a->B; 3765 *colmap = a->garray; 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3771 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3772 { 3773 PetscErrorCode ierr; 3774 PetscInt i; 3775 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3776 3777 PetscFunctionBegin; 3778 if (coloring->ctype == IS_COLORING_GLOBAL) { 3779 ISColoringValue *allcolors,*colors; 3780 ISColoring ocoloring; 3781 3782 /* set coloring for diagonal portion */ 3783 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3784 3785 /* set coloring for off-diagonal portion */ 3786 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3787 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3788 for (i=0; i<a->B->cmap->n; i++) { 3789 colors[i] = allcolors[a->garray[i]]; 3790 } 3791 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3792 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3793 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3794 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3795 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3796 ISColoringValue *colors; 3797 PetscInt *larray; 3798 ISColoring ocoloring; 3799 3800 /* set coloring for diagonal portion */ 3801 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3802 for (i=0; i<a->A->cmap->n; i++) { 3803 larray[i] = i + A->cmap->rstart; 3804 } 3805 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3806 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3807 for (i=0; i<a->A->cmap->n; i++) { 3808 colors[i] = coloring->colors[larray[i]]; 3809 } 3810 ierr = PetscFree(larray);CHKERRQ(ierr); 3811 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3812 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3813 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3814 3815 /* set coloring for off-diagonal portion */ 3816 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3817 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3818 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3819 for (i=0; i<a->B->cmap->n; i++) { 3820 colors[i] = coloring->colors[larray[i]]; 3821 } 3822 ierr = PetscFree(larray);CHKERRQ(ierr); 3823 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3824 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3825 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3826 } else { 3827 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3828 } 3829 3830 PetscFunctionReturn(0); 3831 } 3832 3833 #if defined(PETSC_HAVE_ADIC) 3834 #undef __FUNCT__ 3835 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3836 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3837 { 3838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3839 PetscErrorCode ierr; 3840 3841 PetscFunctionBegin; 3842 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3843 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3844 PetscFunctionReturn(0); 3845 } 3846 #endif 3847 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3850 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3851 { 3852 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3853 PetscErrorCode ierr; 3854 3855 PetscFunctionBegin; 3856 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3857 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3858 PetscFunctionReturn(0); 3859 } 3860 3861 #undef __FUNCT__ 3862 #define __FUNCT__ "MatMerge" 3863 /*@ 3864 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3865 matrices from each processor 3866 3867 Collective on MPI_Comm 3868 3869 Input Parameters: 3870 + comm - the communicators the parallel matrix will live on 3871 . inmat - the input sequential matrices 3872 . n - number of local columns (or PETSC_DECIDE) 3873 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3874 3875 Output Parameter: 3876 . outmat - the parallel matrix generated 3877 3878 Level: advanced 3879 3880 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3881 3882 @*/ 3883 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3884 { 3885 PetscErrorCode ierr; 3886 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3887 PetscInt *indx; 3888 PetscScalar *values; 3889 3890 PetscFunctionBegin; 3891 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3892 if (scall == MAT_INITIAL_MATRIX){ 3893 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3894 if (n == PETSC_DECIDE){ 3895 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3896 } 3897 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3898 rstart -= m; 3899 3900 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3901 for (i=0;i<m;i++) { 3902 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3903 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3904 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3905 } 3906 /* This routine will ONLY return MPIAIJ type matrix */ 3907 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3908 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3909 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3910 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3911 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3912 3913 } else if (scall == MAT_REUSE_MATRIX){ 3914 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3915 } else { 3916 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3917 } 3918 3919 for (i=0;i<m;i++) { 3920 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3921 Ii = i + rstart; 3922 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3923 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3924 } 3925 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3926 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3927 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3928 3929 PetscFunctionReturn(0); 3930 } 3931 3932 #undef __FUNCT__ 3933 #define __FUNCT__ "MatFileSplit" 3934 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3935 { 3936 PetscErrorCode ierr; 3937 PetscMPIInt rank; 3938 PetscInt m,N,i,rstart,nnz; 3939 size_t len; 3940 const PetscInt *indx; 3941 PetscViewer out; 3942 char *name; 3943 Mat B; 3944 const PetscScalar *values; 3945 3946 PetscFunctionBegin; 3947 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3948 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3949 /* Should this be the type of the diagonal block of A? */ 3950 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3951 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3952 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3953 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3954 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3955 for (i=0;i<m;i++) { 3956 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3957 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3958 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3959 } 3960 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3961 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3962 3963 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3964 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3965 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3966 sprintf(name,"%s.%d",outfile,rank); 3967 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3968 ierr = PetscFree(name); 3969 ierr = MatView(B,out);CHKERRQ(ierr); 3970 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3971 ierr = MatDestroy(B);CHKERRQ(ierr); 3972 PetscFunctionReturn(0); 3973 } 3974 3975 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3976 #undef __FUNCT__ 3977 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3978 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3979 { 3980 PetscErrorCode ierr; 3981 Mat_Merge_SeqsToMPI *merge; 3982 PetscContainer container; 3983 3984 PetscFunctionBegin; 3985 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3986 if (container) { 3987 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3988 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3989 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 3999 4000 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4001 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4002 } 4003 ierr = PetscFree(merge);CHKERRQ(ierr); 4004 4005 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 #include "../src/mat/utils/freespace.h" 4010 #include "petscbt.h" 4011 4012 #undef __FUNCT__ 4013 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4014 /*@C 4015 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4016 matrices from each processor 4017 4018 Collective on MPI_Comm 4019 4020 Input Parameters: 4021 + comm - the communicators the parallel matrix will live on 4022 . seqmat - the input sequential matrices 4023 . m - number of local rows (or PETSC_DECIDE) 4024 . n - number of local columns (or PETSC_DECIDE) 4025 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4026 4027 Output Parameter: 4028 . mpimat - the parallel matrix generated 4029 4030 Level: advanced 4031 4032 Notes: 4033 The dimensions of the sequential matrix in each processor MUST be the same. 4034 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4035 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4036 @*/ 4037 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4038 { 4039 PetscErrorCode ierr; 4040 MPI_Comm comm=((PetscObject)mpimat)->comm; 4041 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4042 PetscMPIInt size,rank,taga,*len_s; 4043 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4044 PetscInt proc,m; 4045 PetscInt **buf_ri,**buf_rj; 4046 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4047 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4048 MPI_Request *s_waits,*r_waits; 4049 MPI_Status *status; 4050 MatScalar *aa=a->a; 4051 MatScalar **abuf_r,*ba_i; 4052 Mat_Merge_SeqsToMPI *merge; 4053 PetscContainer container; 4054 4055 PetscFunctionBegin; 4056 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4057 4058 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4059 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4060 4061 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4062 if (container) { 4063 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4064 } 4065 bi = merge->bi; 4066 bj = merge->bj; 4067 buf_ri = merge->buf_ri; 4068 buf_rj = merge->buf_rj; 4069 4070 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4071 owners = merge->rowmap.range; 4072 len_s = merge->len_s; 4073 4074 /* send and recv matrix values */ 4075 /*-----------------------------*/ 4076 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4077 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4078 4079 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4080 for (proc=0,k=0; proc<size; proc++){ 4081 if (!len_s[proc]) continue; 4082 i = owners[proc]; 4083 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4084 k++; 4085 } 4086 4087 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4088 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4089 ierr = PetscFree(status);CHKERRQ(ierr); 4090 4091 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4092 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4093 4094 /* insert mat values of mpimat */ 4095 /*----------------------------*/ 4096 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4097 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4098 nextrow = buf_ri_k + merge->nrecv; 4099 nextai = nextrow + merge->nrecv; 4100 4101 for (k=0; k<merge->nrecv; k++){ 4102 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4103 nrows = *(buf_ri_k[k]); 4104 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4105 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4106 } 4107 4108 /* set values of ba */ 4109 m = merge->rowmap.n; 4110 for (i=0; i<m; i++) { 4111 arow = owners[rank] + i; 4112 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4113 bnzi = bi[i+1] - bi[i]; 4114 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4115 4116 /* add local non-zero vals of this proc's seqmat into ba */ 4117 anzi = ai[arow+1] - ai[arow]; 4118 aj = a->j + ai[arow]; 4119 aa = a->a + ai[arow]; 4120 nextaj = 0; 4121 for (j=0; nextaj<anzi; j++){ 4122 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4123 ba_i[j] += aa[nextaj++]; 4124 } 4125 } 4126 4127 /* add received vals into ba */ 4128 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4129 /* i-th row */ 4130 if (i == *nextrow[k]) { 4131 anzi = *(nextai[k]+1) - *nextai[k]; 4132 aj = buf_rj[k] + *(nextai[k]); 4133 aa = abuf_r[k] + *(nextai[k]); 4134 nextaj = 0; 4135 for (j=0; nextaj<anzi; j++){ 4136 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4137 ba_i[j] += aa[nextaj++]; 4138 } 4139 } 4140 nextrow[k]++; nextai[k]++; 4141 } 4142 } 4143 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4144 } 4145 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4146 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4147 4148 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4149 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4150 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4151 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4152 PetscFunctionReturn(0); 4153 } 4154 4155 #undef __FUNCT__ 4156 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4157 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4158 { 4159 PetscErrorCode ierr; 4160 Mat B_mpi; 4161 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4162 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4163 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4164 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4165 PetscInt len,proc,*dnz,*onz; 4166 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4167 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4168 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4169 MPI_Status *status; 4170 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4171 PetscBT lnkbt; 4172 Mat_Merge_SeqsToMPI *merge; 4173 PetscContainer container; 4174 4175 PetscFunctionBegin; 4176 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4177 4178 /* make sure it is a PETSc comm */ 4179 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4181 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4182 4183 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4184 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4185 4186 /* determine row ownership */ 4187 /*---------------------------------------------------------*/ 4188 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4189 merge->rowmap.n = m; 4190 merge->rowmap.N = M; 4191 merge->rowmap.bs = 1; 4192 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4193 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4194 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4195 4196 m = merge->rowmap.n; 4197 M = merge->rowmap.N; 4198 owners = merge->rowmap.range; 4199 4200 /* determine the number of messages to send, their lengths */ 4201 /*---------------------------------------------------------*/ 4202 len_s = merge->len_s; 4203 4204 len = 0; /* length of buf_si[] */ 4205 merge->nsend = 0; 4206 for (proc=0; proc<size; proc++){ 4207 len_si[proc] = 0; 4208 if (proc == rank){ 4209 len_s[proc] = 0; 4210 } else { 4211 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4212 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4213 } 4214 if (len_s[proc]) { 4215 merge->nsend++; 4216 nrows = 0; 4217 for (i=owners[proc]; i<owners[proc+1]; i++){ 4218 if (ai[i+1] > ai[i]) nrows++; 4219 } 4220 len_si[proc] = 2*(nrows+1); 4221 len += len_si[proc]; 4222 } 4223 } 4224 4225 /* determine the number and length of messages to receive for ij-structure */ 4226 /*-------------------------------------------------------------------------*/ 4227 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4228 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4229 4230 /* post the Irecv of j-structure */ 4231 /*-------------------------------*/ 4232 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4233 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4234 4235 /* post the Isend of j-structure */ 4236 /*--------------------------------*/ 4237 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4238 sj_waits = si_waits + merge->nsend; 4239 4240 for (proc=0, k=0; proc<size; proc++){ 4241 if (!len_s[proc]) continue; 4242 i = owners[proc]; 4243 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4244 k++; 4245 } 4246 4247 /* receives and sends of j-structure are complete */ 4248 /*------------------------------------------------*/ 4249 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4250 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4251 4252 /* send and recv i-structure */ 4253 /*---------------------------*/ 4254 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4255 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4256 4257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4258 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4259 for (proc=0,k=0; proc<size; proc++){ 4260 if (!len_s[proc]) continue; 4261 /* form outgoing message for i-structure: 4262 buf_si[0]: nrows to be sent 4263 [1:nrows]: row index (global) 4264 [nrows+1:2*nrows+1]: i-structure index 4265 */ 4266 /*-------------------------------------------*/ 4267 nrows = len_si[proc]/2 - 1; 4268 buf_si_i = buf_si + nrows+1; 4269 buf_si[0] = nrows; 4270 buf_si_i[0] = 0; 4271 nrows = 0; 4272 for (i=owners[proc]; i<owners[proc+1]; i++){ 4273 anzi = ai[i+1] - ai[i]; 4274 if (anzi) { 4275 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4276 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4277 nrows++; 4278 } 4279 } 4280 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4281 k++; 4282 buf_si += len_si[proc]; 4283 } 4284 4285 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4286 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4287 4288 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4289 for (i=0; i<merge->nrecv; i++){ 4290 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4291 } 4292 4293 ierr = PetscFree(len_si);CHKERRQ(ierr); 4294 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4295 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4296 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4297 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4298 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4299 ierr = PetscFree(status);CHKERRQ(ierr); 4300 4301 /* compute a local seq matrix in each processor */ 4302 /*----------------------------------------------*/ 4303 /* allocate bi array and free space for accumulating nonzero column info */ 4304 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4305 bi[0] = 0; 4306 4307 /* create and initialize a linked list */ 4308 nlnk = N+1; 4309 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4310 4311 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4312 len = 0; 4313 len = ai[owners[rank+1]] - ai[owners[rank]]; 4314 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4315 current_space = free_space; 4316 4317 /* determine symbolic info for each local row */ 4318 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4319 nextrow = buf_ri_k + merge->nrecv; 4320 nextai = nextrow + merge->nrecv; 4321 for (k=0; k<merge->nrecv; k++){ 4322 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4323 nrows = *buf_ri_k[k]; 4324 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4325 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4326 } 4327 4328 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4329 len = 0; 4330 for (i=0;i<m;i++) { 4331 bnzi = 0; 4332 /* add local non-zero cols of this proc's seqmat into lnk */ 4333 arow = owners[rank] + i; 4334 anzi = ai[arow+1] - ai[arow]; 4335 aj = a->j + ai[arow]; 4336 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4337 bnzi += nlnk; 4338 /* add received col data into lnk */ 4339 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4340 if (i == *nextrow[k]) { /* i-th row */ 4341 anzi = *(nextai[k]+1) - *nextai[k]; 4342 aj = buf_rj[k] + *nextai[k]; 4343 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4344 bnzi += nlnk; 4345 nextrow[k]++; nextai[k]++; 4346 } 4347 } 4348 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4349 4350 /* if free space is not available, make more free space */ 4351 if (current_space->local_remaining<bnzi) { 4352 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4353 nspacedouble++; 4354 } 4355 /* copy data into free space, then initialize lnk */ 4356 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4357 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4358 4359 current_space->array += bnzi; 4360 current_space->local_used += bnzi; 4361 current_space->local_remaining -= bnzi; 4362 4363 bi[i+1] = bi[i] + bnzi; 4364 } 4365 4366 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4367 4368 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4369 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4371 4372 /* create symbolic parallel matrix B_mpi */ 4373 /*---------------------------------------*/ 4374 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4375 if (n==PETSC_DECIDE) { 4376 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4377 } else { 4378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4379 } 4380 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4381 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4382 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4383 4384 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4385 B_mpi->assembled = PETSC_FALSE; 4386 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4387 merge->bi = bi; 4388 merge->bj = bj; 4389 merge->buf_ri = buf_ri; 4390 merge->buf_rj = buf_rj; 4391 merge->coi = PETSC_NULL; 4392 merge->coj = PETSC_NULL; 4393 merge->owners_co = PETSC_NULL; 4394 4395 /* attach the supporting struct to B_mpi for reuse */ 4396 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4397 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4398 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4399 *mpimat = B_mpi; 4400 4401 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4402 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "MatMerge_SeqsToMPI" 4408 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4409 { 4410 PetscErrorCode ierr; 4411 4412 PetscFunctionBegin; 4413 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4414 if (scall == MAT_INITIAL_MATRIX){ 4415 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4416 } 4417 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4418 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "MatGetLocalMat" 4424 /*@ 4425 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4426 4427 Not Collective 4428 4429 Input Parameters: 4430 + A - the matrix 4431 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4432 4433 Output Parameter: 4434 . A_loc - the local sequential matrix generated 4435 4436 Level: developer 4437 4438 @*/ 4439 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4440 { 4441 PetscErrorCode ierr; 4442 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4443 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4444 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4445 MatScalar *aa=a->a,*ba=b->a,*cam; 4446 PetscScalar *ca; 4447 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4448 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4449 4450 PetscFunctionBegin; 4451 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4452 if (scall == MAT_INITIAL_MATRIX){ 4453 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4454 ci[0] = 0; 4455 for (i=0; i<am; i++){ 4456 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4457 } 4458 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4459 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4460 k = 0; 4461 for (i=0; i<am; i++) { 4462 ncols_o = bi[i+1] - bi[i]; 4463 ncols_d = ai[i+1] - ai[i]; 4464 /* off-diagonal portion of A */ 4465 for (jo=0; jo<ncols_o; jo++) { 4466 col = cmap[*bj]; 4467 if (col >= cstart) break; 4468 cj[k] = col; bj++; 4469 ca[k++] = *ba++; 4470 } 4471 /* diagonal portion of A */ 4472 for (j=0; j<ncols_d; j++) { 4473 cj[k] = cstart + *aj++; 4474 ca[k++] = *aa++; 4475 } 4476 /* off-diagonal portion of A */ 4477 for (j=jo; j<ncols_o; j++) { 4478 cj[k] = cmap[*bj++]; 4479 ca[k++] = *ba++; 4480 } 4481 } 4482 /* put together the new matrix */ 4483 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4484 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4485 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4486 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4487 mat->free_a = PETSC_TRUE; 4488 mat->free_ij = PETSC_TRUE; 4489 mat->nonew = 0; 4490 } else if (scall == MAT_REUSE_MATRIX){ 4491 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4492 ci = mat->i; cj = mat->j; cam = mat->a; 4493 for (i=0; i<am; i++) { 4494 /* off-diagonal portion of A */ 4495 ncols_o = bi[i+1] - bi[i]; 4496 for (jo=0; jo<ncols_o; jo++) { 4497 col = cmap[*bj]; 4498 if (col >= cstart) break; 4499 *cam++ = *ba++; bj++; 4500 } 4501 /* diagonal portion of A */ 4502 ncols_d = ai[i+1] - ai[i]; 4503 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4504 /* off-diagonal portion of A */ 4505 for (j=jo; j<ncols_o; j++) { 4506 *cam++ = *ba++; bj++; 4507 } 4508 } 4509 } else { 4510 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4511 } 4512 4513 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4514 PetscFunctionReturn(0); 4515 } 4516 4517 #undef __FUNCT__ 4518 #define __FUNCT__ "MatGetLocalMatCondensed" 4519 /*@C 4520 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4521 4522 Not Collective 4523 4524 Input Parameters: 4525 + A - the matrix 4526 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4527 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4528 4529 Output Parameter: 4530 . A_loc - the local sequential matrix generated 4531 4532 Level: developer 4533 4534 @*/ 4535 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4536 { 4537 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4538 PetscErrorCode ierr; 4539 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4540 IS isrowa,iscola; 4541 Mat *aloc; 4542 4543 PetscFunctionBegin; 4544 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4545 if (!row){ 4546 start = A->rmap->rstart; end = A->rmap->rend; 4547 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4548 } else { 4549 isrowa = *row; 4550 } 4551 if (!col){ 4552 start = A->cmap->rstart; 4553 cmap = a->garray; 4554 nzA = a->A->cmap->n; 4555 nzB = a->B->cmap->n; 4556 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4557 ncols = 0; 4558 for (i=0; i<nzB; i++) { 4559 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4560 else break; 4561 } 4562 imark = i; 4563 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4564 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4565 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4566 ierr = PetscFree(idx);CHKERRQ(ierr); 4567 } else { 4568 iscola = *col; 4569 } 4570 if (scall != MAT_INITIAL_MATRIX){ 4571 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4572 aloc[0] = *A_loc; 4573 } 4574 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4575 *A_loc = aloc[0]; 4576 ierr = PetscFree(aloc);CHKERRQ(ierr); 4577 if (!row){ 4578 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4579 } 4580 if (!col){ 4581 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4582 } 4583 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #undef __FUNCT__ 4588 #define __FUNCT__ "MatGetBrowsOfAcols" 4589 /*@C 4590 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4591 4592 Collective on Mat 4593 4594 Input Parameters: 4595 + A,B - the matrices in mpiaij format 4596 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4597 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4598 4599 Output Parameter: 4600 + rowb, colb - index sets of rows and columns of B to extract 4601 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4602 - B_seq - the sequential matrix generated 4603 4604 Level: developer 4605 4606 @*/ 4607 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4608 { 4609 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4610 PetscErrorCode ierr; 4611 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4612 IS isrowb,iscolb; 4613 Mat *bseq; 4614 4615 PetscFunctionBegin; 4616 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4617 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4618 } 4619 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4620 4621 if (scall == MAT_INITIAL_MATRIX){ 4622 start = A->cmap->rstart; 4623 cmap = a->garray; 4624 nzA = a->A->cmap->n; 4625 nzB = a->B->cmap->n; 4626 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4627 ncols = 0; 4628 for (i=0; i<nzB; i++) { /* row < local row index */ 4629 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4630 else break; 4631 } 4632 imark = i; 4633 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4634 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4635 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4636 ierr = PetscFree(idx);CHKERRQ(ierr); 4637 *brstart = imark; 4638 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4639 } else { 4640 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4641 isrowb = *rowb; iscolb = *colb; 4642 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4643 bseq[0] = *B_seq; 4644 } 4645 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4646 *B_seq = bseq[0]; 4647 ierr = PetscFree(bseq);CHKERRQ(ierr); 4648 if (!rowb){ 4649 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4650 } else { 4651 *rowb = isrowb; 4652 } 4653 if (!colb){ 4654 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4655 } else { 4656 *colb = iscolb; 4657 } 4658 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4659 PetscFunctionReturn(0); 4660 } 4661 4662 #undef __FUNCT__ 4663 #define __FUNCT__ "MatGetBrowsOfAoCols" 4664 /*@C 4665 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4666 of the OFF-DIAGONAL portion of local A 4667 4668 Collective on Mat 4669 4670 Input Parameters: 4671 + A,B - the matrices in mpiaij format 4672 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4673 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4674 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4675 4676 Output Parameter: 4677 + B_oth - the sequential matrix generated 4678 4679 Level: developer 4680 4681 @*/ 4682 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4683 { 4684 VecScatter_MPI_General *gen_to,*gen_from; 4685 PetscErrorCode ierr; 4686 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4687 Mat_SeqAIJ *b_oth; 4688 VecScatter ctx=a->Mvctx; 4689 MPI_Comm comm=((PetscObject)ctx)->comm; 4690 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4691 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4692 PetscScalar *rvalues,*svalues; 4693 MatScalar *b_otha,*bufa,*bufA; 4694 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4695 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4696 MPI_Status *sstatus,rstatus; 4697 PetscMPIInt jj; 4698 PetscInt *cols,sbs,rbs; 4699 PetscScalar *vals; 4700 4701 PetscFunctionBegin; 4702 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4703 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4704 } 4705 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4706 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4707 4708 gen_to = (VecScatter_MPI_General*)ctx->todata; 4709 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4710 rvalues = gen_from->values; /* holds the length of receiving row */ 4711 svalues = gen_to->values; /* holds the length of sending row */ 4712 nrecvs = gen_from->n; 4713 nsends = gen_to->n; 4714 4715 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4716 srow = gen_to->indices; /* local row index to be sent */ 4717 sstarts = gen_to->starts; 4718 sprocs = gen_to->procs; 4719 sstatus = gen_to->sstatus; 4720 sbs = gen_to->bs; 4721 rstarts = gen_from->starts; 4722 rprocs = gen_from->procs; 4723 rbs = gen_from->bs; 4724 4725 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4726 if (scall == MAT_INITIAL_MATRIX){ 4727 /* i-array */ 4728 /*---------*/ 4729 /* post receives */ 4730 for (i=0; i<nrecvs; i++){ 4731 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4732 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4733 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4734 } 4735 4736 /* pack the outgoing message */ 4737 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4738 rstartsj = sstartsj + nsends +1; 4739 sstartsj[0] = 0; rstartsj[0] = 0; 4740 len = 0; /* total length of j or a array to be sent */ 4741 k = 0; 4742 for (i=0; i<nsends; i++){ 4743 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4744 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4745 for (j=0; j<nrows; j++) { 4746 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4747 for (l=0; l<sbs; l++){ 4748 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4749 rowlen[j*sbs+l] = ncols; 4750 len += ncols; 4751 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4752 } 4753 k++; 4754 } 4755 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4756 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4757 } 4758 /* recvs and sends of i-array are completed */ 4759 i = nrecvs; 4760 while (i--) { 4761 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4762 } 4763 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4764 4765 /* allocate buffers for sending j and a arrays */ 4766 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4767 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4768 4769 /* create i-array of B_oth */ 4770 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4771 b_othi[0] = 0; 4772 len = 0; /* total length of j or a array to be received */ 4773 k = 0; 4774 for (i=0; i<nrecvs; i++){ 4775 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4776 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4777 for (j=0; j<nrows; j++) { 4778 b_othi[k+1] = b_othi[k] + rowlen[j]; 4779 len += rowlen[j]; k++; 4780 } 4781 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4782 } 4783 4784 /* allocate space for j and a arrrays of B_oth */ 4785 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4786 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4787 4788 /* j-array */ 4789 /*---------*/ 4790 /* post receives of j-array */ 4791 for (i=0; i<nrecvs; i++){ 4792 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4793 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4794 } 4795 4796 /* pack the outgoing message j-array */ 4797 k = 0; 4798 for (i=0; i<nsends; i++){ 4799 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4800 bufJ = bufj+sstartsj[i]; 4801 for (j=0; j<nrows; j++) { 4802 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4803 for (ll=0; ll<sbs; ll++){ 4804 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4805 for (l=0; l<ncols; l++){ 4806 *bufJ++ = cols[l]; 4807 } 4808 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4809 } 4810 } 4811 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4812 } 4813 4814 /* recvs and sends of j-array are completed */ 4815 i = nrecvs; 4816 while (i--) { 4817 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4818 } 4819 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4820 } else if (scall == MAT_REUSE_MATRIX){ 4821 sstartsj = *startsj; 4822 rstartsj = sstartsj + nsends +1; 4823 bufa = *bufa_ptr; 4824 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4825 b_otha = b_oth->a; 4826 } else { 4827 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4828 } 4829 4830 /* a-array */ 4831 /*---------*/ 4832 /* post receives of a-array */ 4833 for (i=0; i<nrecvs; i++){ 4834 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4835 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4836 } 4837 4838 /* pack the outgoing message a-array */ 4839 k = 0; 4840 for (i=0; i<nsends; i++){ 4841 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4842 bufA = bufa+sstartsj[i]; 4843 for (j=0; j<nrows; j++) { 4844 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4845 for (ll=0; ll<sbs; ll++){ 4846 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4847 for (l=0; l<ncols; l++){ 4848 *bufA++ = vals[l]; 4849 } 4850 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4851 } 4852 } 4853 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4854 } 4855 /* recvs and sends of a-array are completed */ 4856 i = nrecvs; 4857 while (i--) { 4858 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4859 } 4860 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4861 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4862 4863 if (scall == MAT_INITIAL_MATRIX){ 4864 /* put together the new matrix */ 4865 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4866 4867 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4868 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4869 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4870 b_oth->free_a = PETSC_TRUE; 4871 b_oth->free_ij = PETSC_TRUE; 4872 b_oth->nonew = 0; 4873 4874 ierr = PetscFree(bufj);CHKERRQ(ierr); 4875 if (!startsj || !bufa_ptr){ 4876 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4877 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4878 } else { 4879 *startsj = sstartsj; 4880 *bufa_ptr = bufa; 4881 } 4882 } 4883 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4884 PetscFunctionReturn(0); 4885 } 4886 4887 #undef __FUNCT__ 4888 #define __FUNCT__ "MatGetCommunicationStructs" 4889 /*@C 4890 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4891 4892 Not Collective 4893 4894 Input Parameters: 4895 . A - The matrix in mpiaij format 4896 4897 Output Parameter: 4898 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4899 . colmap - A map from global column index to local index into lvec 4900 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4901 4902 Level: developer 4903 4904 @*/ 4905 #if defined (PETSC_USE_CTABLE) 4906 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4907 #else 4908 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4909 #endif 4910 { 4911 Mat_MPIAIJ *a; 4912 4913 PetscFunctionBegin; 4914 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4915 PetscValidPointer(lvec, 2) 4916 PetscValidPointer(colmap, 3) 4917 PetscValidPointer(multScatter, 4) 4918 a = (Mat_MPIAIJ *) A->data; 4919 if (lvec) *lvec = a->lvec; 4920 if (colmap) *colmap = a->colmap; 4921 if (multScatter) *multScatter = a->Mvctx; 4922 PetscFunctionReturn(0); 4923 } 4924 4925 EXTERN_C_BEGIN 4926 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4927 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4928 EXTERN_C_END 4929 4930 #include "../src/mat/impls/dense/mpi/mpidense.h" 4931 4932 #undef __FUNCT__ 4933 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4934 /* 4935 Computes (B'*A')' since computing B*A directly is untenable 4936 4937 n p p 4938 ( ) ( ) ( ) 4939 m ( A ) * n ( B ) = m ( C ) 4940 ( ) ( ) ( ) 4941 4942 */ 4943 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4944 { 4945 PetscErrorCode ierr; 4946 Mat At,Bt,Ct; 4947 4948 PetscFunctionBegin; 4949 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4950 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4951 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4952 ierr = MatDestroy(At);CHKERRQ(ierr); 4953 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4954 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4955 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 #undef __FUNCT__ 4960 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4961 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4962 { 4963 PetscErrorCode ierr; 4964 PetscInt m=A->rmap->n,n=B->cmap->n; 4965 Mat Cmat; 4966 4967 PetscFunctionBegin; 4968 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4969 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4970 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4971 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4972 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4973 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4974 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4975 *C = Cmat; 4976 PetscFunctionReturn(0); 4977 } 4978 4979 /* ----------------------------------------------------------------*/ 4980 #undef __FUNCT__ 4981 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4982 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4983 { 4984 PetscErrorCode ierr; 4985 4986 PetscFunctionBegin; 4987 if (scall == MAT_INITIAL_MATRIX){ 4988 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4989 } 4990 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4991 PetscFunctionReturn(0); 4992 } 4993 4994 EXTERN_C_BEGIN 4995 #if defined(PETSC_HAVE_MUMPS) 4996 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4997 #endif 4998 #if defined(PETSC_HAVE_PASTIX) 4999 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5000 #endif 5001 #if defined(PETSC_HAVE_SUPERLU_DIST) 5002 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5003 #endif 5004 #if defined(PETSC_HAVE_SPOOLES) 5005 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5006 #endif 5007 EXTERN_C_END 5008 5009 /*MC 5010 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5011 5012 Options Database Keys: 5013 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5014 5015 Level: beginner 5016 5017 .seealso: MatCreateMPIAIJ() 5018 M*/ 5019 5020 EXTERN_C_BEGIN 5021 #undef __FUNCT__ 5022 #define __FUNCT__ "MatCreate_MPIAIJ" 5023 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5024 { 5025 Mat_MPIAIJ *b; 5026 PetscErrorCode ierr; 5027 PetscMPIInt size; 5028 5029 PetscFunctionBegin; 5030 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5031 5032 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5033 B->data = (void*)b; 5034 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5035 B->rmap->bs = 1; 5036 B->assembled = PETSC_FALSE; 5037 B->mapping = 0; 5038 5039 B->insertmode = NOT_SET_VALUES; 5040 b->size = size; 5041 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5042 5043 /* build cache for off array entries formed */ 5044 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5045 b->donotstash = PETSC_FALSE; 5046 b->colmap = 0; 5047 b->garray = 0; 5048 b->roworiented = PETSC_TRUE; 5049 5050 /* stuff used for matrix vector multiply */ 5051 b->lvec = PETSC_NULL; 5052 b->Mvctx = PETSC_NULL; 5053 5054 /* stuff for MatGetRow() */ 5055 b->rowindices = 0; 5056 b->rowvalues = 0; 5057 b->getrowactive = PETSC_FALSE; 5058 5059 #if defined(PETSC_HAVE_SPOOLES) 5060 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5061 "MatGetFactor_mpiaij_spooles", 5062 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5063 #endif 5064 #if defined(PETSC_HAVE_MUMPS) 5065 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5066 "MatGetFactor_mpiaij_mumps", 5067 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5068 #endif 5069 #if defined(PETSC_HAVE_PASTIX) 5070 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5071 "MatGetFactor_mpiaij_pastix", 5072 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5073 #endif 5074 #if defined(PETSC_HAVE_SUPERLU_DIST) 5075 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5076 "MatGetFactor_mpiaij_superlu_dist", 5077 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5078 #endif 5079 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5080 "MatStoreValues_MPIAIJ", 5081 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5083 "MatRetrieveValues_MPIAIJ", 5084 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5086 "MatGetDiagonalBlock_MPIAIJ", 5087 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5089 "MatIsTranspose_MPIAIJ", 5090 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5092 "MatMPIAIJSetPreallocation_MPIAIJ", 5093 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5095 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5096 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5098 "MatDiagonalScaleLocal_MPIAIJ", 5099 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5101 "MatConvert_MPIAIJ_MPICSRPERM", 5102 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5104 "MatConvert_MPIAIJ_MPICRL", 5105 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5107 "MatMatMult_MPIDense_MPIAIJ", 5108 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5110 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5111 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5113 "MatMatMultNumeric_MPIDense_MPIAIJ", 5114 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5115 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5116 PetscFunctionReturn(0); 5117 } 5118 EXTERN_C_END 5119 5120 #undef __FUNCT__ 5121 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5122 /*@ 5123 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5124 and "off-diagonal" part of the matrix in CSR format. 5125 5126 Collective on MPI_Comm 5127 5128 Input Parameters: 5129 + comm - MPI communicator 5130 . m - number of local rows (Cannot be PETSC_DECIDE) 5131 . n - This value should be the same as the local size used in creating the 5132 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5133 calculated if N is given) For square matrices n is almost always m. 5134 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5135 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5136 . i - row indices for "diagonal" portion of matrix 5137 . j - column indices 5138 . a - matrix values 5139 . oi - row indices for "off-diagonal" portion of matrix 5140 . oj - column indices 5141 - oa - matrix values 5142 5143 Output Parameter: 5144 . mat - the matrix 5145 5146 Level: advanced 5147 5148 Notes: 5149 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5150 5151 The i and j indices are 0 based 5152 5153 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5154 5155 5156 .keywords: matrix, aij, compressed row, sparse, parallel 5157 5158 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5159 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5160 @*/ 5161 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5162 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5163 { 5164 PetscErrorCode ierr; 5165 Mat_MPIAIJ *maij; 5166 5167 PetscFunctionBegin; 5168 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5169 if (i[0]) { 5170 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5171 } 5172 if (oi[0]) { 5173 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5174 } 5175 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5176 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5177 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5178 maij = (Mat_MPIAIJ*) (*mat)->data; 5179 maij->donotstash = PETSC_TRUE; 5180 (*mat)->preallocated = PETSC_TRUE; 5181 5182 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5183 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5184 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5185 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5186 5187 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5188 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5189 5190 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5191 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5192 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5193 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5194 5195 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5196 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5197 PetscFunctionReturn(0); 5198 } 5199 5200 /* 5201 Special version for direct calls from Fortran 5202 */ 5203 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5204 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5205 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5206 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5207 #endif 5208 5209 /* Change these macros so can be used in void function */ 5210 #undef CHKERRQ 5211 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5212 #undef SETERRQ2 5213 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5214 #undef SETERRQ 5215 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5216 5217 EXTERN_C_BEGIN 5218 #undef __FUNCT__ 5219 #define __FUNCT__ "matsetvaluesmpiaij_" 5220 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5221 { 5222 Mat mat = *mmat; 5223 PetscInt m = *mm, n = *mn; 5224 InsertMode addv = *maddv; 5225 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5226 PetscScalar value; 5227 PetscErrorCode ierr; 5228 5229 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5230 if (mat->insertmode == NOT_SET_VALUES) { 5231 mat->insertmode = addv; 5232 } 5233 #if defined(PETSC_USE_DEBUG) 5234 else if (mat->insertmode != addv) { 5235 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5236 } 5237 #endif 5238 { 5239 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5240 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5241 PetscTruth roworiented = aij->roworiented; 5242 5243 /* Some Variables required in the macro */ 5244 Mat A = aij->A; 5245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5246 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5247 MatScalar *aa = a->a; 5248 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5249 Mat B = aij->B; 5250 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5251 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5252 MatScalar *ba = b->a; 5253 5254 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5255 PetscInt nonew = a->nonew; 5256 MatScalar *ap1,*ap2; 5257 5258 PetscFunctionBegin; 5259 for (i=0; i<m; i++) { 5260 if (im[i] < 0) continue; 5261 #if defined(PETSC_USE_DEBUG) 5262 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5263 #endif 5264 if (im[i] >= rstart && im[i] < rend) { 5265 row = im[i] - rstart; 5266 lastcol1 = -1; 5267 rp1 = aj + ai[row]; 5268 ap1 = aa + ai[row]; 5269 rmax1 = aimax[row]; 5270 nrow1 = ailen[row]; 5271 low1 = 0; 5272 high1 = nrow1; 5273 lastcol2 = -1; 5274 rp2 = bj + bi[row]; 5275 ap2 = ba + bi[row]; 5276 rmax2 = bimax[row]; 5277 nrow2 = bilen[row]; 5278 low2 = 0; 5279 high2 = nrow2; 5280 5281 for (j=0; j<n; j++) { 5282 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5283 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5284 if (in[j] >= cstart && in[j] < cend){ 5285 col = in[j] - cstart; 5286 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5287 } else if (in[j] < 0) continue; 5288 #if defined(PETSC_USE_DEBUG) 5289 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5290 #endif 5291 else { 5292 if (mat->was_assembled) { 5293 if (!aij->colmap) { 5294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5295 } 5296 #if defined (PETSC_USE_CTABLE) 5297 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5298 col--; 5299 #else 5300 col = aij->colmap[in[j]] - 1; 5301 #endif 5302 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5303 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5304 col = in[j]; 5305 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5306 B = aij->B; 5307 b = (Mat_SeqAIJ*)B->data; 5308 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5309 rp2 = bj + bi[row]; 5310 ap2 = ba + bi[row]; 5311 rmax2 = bimax[row]; 5312 nrow2 = bilen[row]; 5313 low2 = 0; 5314 high2 = nrow2; 5315 bm = aij->B->rmap->n; 5316 ba = b->a; 5317 } 5318 } else col = in[j]; 5319 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5320 } 5321 } 5322 } else { 5323 if (!aij->donotstash) { 5324 if (roworiented) { 5325 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,ignorezeroentries && (addv == ADD_VALUES));CHKERRQ(ierr); 5326 } else { 5327 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,ignorezeroentries && (addv == ADD_VALUES));CHKERRQ(ierr); 5328 } 5329 } 5330 } 5331 }} 5332 PetscFunctionReturnVoid(); 5333 } 5334 EXTERN_C_END 5335 5336