1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) { 468 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 } 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 643 source = lens + nrecvs; 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree(lens);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatView_MPIAIJ_Binary" 926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 927 { 928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 929 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 930 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 931 PetscErrorCode ierr; 932 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 933 int fd; 934 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 935 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 936 PetscScalar *column_values; 937 938 PetscFunctionBegin; 939 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 940 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 941 nz = A->nz + B->nz; 942 if (!rank) { 943 header[0] = MAT_FILE_COOKIE; 944 header[1] = mat->rmap->N; 945 header[2] = mat->cmap->N; 946 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 947 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 948 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 949 /* get largest number of rows any processor has */ 950 rlen = mat->rmap->n; 951 range = mat->rmap->range; 952 for (i=1; i<size; i++) { 953 rlen = PetscMax(rlen,range[i+1] - range[i]); 954 } 955 } else { 956 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 957 rlen = mat->rmap->n; 958 } 959 960 /* load up the local row counts */ 961 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 962 for (i=0; i<mat->rmap->n; i++) { 963 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 964 } 965 966 /* store the row lengths to the file */ 967 if (!rank) { 968 MPI_Status status; 969 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 970 for (i=1; i<size; i++) { 971 rlen = range[i+1] - range[i]; 972 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 973 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 974 } 975 } else { 976 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 } 978 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 979 980 /* load up the local column indices */ 981 nzmax = nz; /* )th processor needs space a largest processor needs */ 982 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 983 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 984 cnt = 0; 985 for (i=0; i<mat->rmap->n; i++) { 986 for (j=B->i[i]; j<B->i[i+1]; j++) { 987 if ( (col = garray[B->j[j]]) > cstart) break; 988 column_indices[cnt++] = col; 989 } 990 for (k=A->i[i]; k<A->i[i+1]; k++) { 991 column_indices[cnt++] = A->j[k] + cstart; 992 } 993 for (; j<B->i[i+1]; j++) { 994 column_indices[cnt++] = garray[B->j[j]]; 995 } 996 } 997 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 998 999 /* store the column indices to the file */ 1000 if (!rank) { 1001 MPI_Status status; 1002 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1003 for (i=1; i<size; i++) { 1004 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1005 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1006 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1007 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1008 } 1009 } else { 1010 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1011 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1014 1015 /* load up the local column values */ 1016 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1017 cnt = 0; 1018 for (i=0; i<mat->rmap->n; i++) { 1019 for (j=B->i[i]; j<B->i[i+1]; j++) { 1020 if ( garray[B->j[j]] > cstart) break; 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 for (k=A->i[i]; k<A->i[i+1]; k++) { 1024 column_values[cnt++] = A->a[k]; 1025 } 1026 for (; j<B->i[i+1]; j++) { 1027 column_values[cnt++] = B->a[j]; 1028 } 1029 } 1030 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1031 1032 /* store the column values to the file */ 1033 if (!rank) { 1034 MPI_Status status; 1035 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1036 for (i=1; i<size; i++) { 1037 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1038 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1039 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1040 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1041 } 1042 } else { 1043 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1044 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 } 1046 ierr = PetscFree(column_values);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1053 { 1054 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1055 PetscErrorCode ierr; 1056 PetscMPIInt rank = aij->rank,size = aij->size; 1057 PetscTruth isdraw,iascii,isbinary; 1058 PetscViewer sviewer; 1059 PetscViewerFormat format; 1060 1061 PetscFunctionBegin; 1062 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1065 if (iascii) { 1066 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1067 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1068 MatInfo info; 1069 PetscTruth inodes; 1070 1071 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1072 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1073 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1074 if (!inodes) { 1075 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1076 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1077 } else { 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1079 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1080 } 1081 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1083 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1087 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1090 PetscInt inodecount,inodelimit,*inodes; 1091 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1092 if (inodes) { 1093 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1094 } else { 1095 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1096 } 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1099 PetscFunctionReturn(0); 1100 } 1101 } else if (isbinary) { 1102 if (size == 1) { 1103 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1104 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1105 } else { 1106 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1107 } 1108 PetscFunctionReturn(0); 1109 } else if (isdraw) { 1110 PetscDraw draw; 1111 PetscTruth isnull; 1112 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1113 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1114 } 1115 1116 if (size == 1) { 1117 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1118 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1119 } else { 1120 /* assemble the entire matrix onto first processor. */ 1121 Mat A; 1122 Mat_SeqAIJ *Aloc; 1123 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1124 MatScalar *a; 1125 1126 if (mat->rmap->N > 1024) { 1127 PetscTruth flg = PETSC_FALSE; 1128 1129 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1130 if (!flg) { 1131 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1132 } 1133 } 1134 1135 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1140 } 1141 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1142 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1143 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1144 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1145 1146 /* copy over the A part */ 1147 Aloc = (Mat_SeqAIJ*)aij->A->data; 1148 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1149 row = mat->rmap->rstart; 1150 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1151 for (i=0; i<m; i++) { 1152 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1153 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1154 } 1155 aj = Aloc->j; 1156 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1157 1158 /* copy over the B part */ 1159 Aloc = (Mat_SeqAIJ*)aij->B->data; 1160 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1161 row = mat->rmap->rstart; 1162 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1163 ct = cols; 1164 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1165 for (i=0; i<m; i++) { 1166 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1167 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1168 } 1169 ierr = PetscFree(ct);CHKERRQ(ierr); 1170 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1171 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 /* 1173 Everyone has to call to draw the matrix since the graphics waits are 1174 synchronized across all processors that share the PetscDraw object 1175 */ 1176 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1177 if (!rank) { 1178 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1179 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1182 ierr = MatDestroy(A);CHKERRQ(ierr); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatView_MPIAIJ" 1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1190 { 1191 PetscErrorCode ierr; 1192 PetscTruth iascii,isdraw,issocket,isbinary; 1193 1194 PetscFunctionBegin; 1195 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199 if (iascii || isdraw || isbinary || issocket) { 1200 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1201 } else { 1202 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatSOR_MPIAIJ" 1209 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1210 { 1211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212 PetscErrorCode ierr; 1213 Vec bb1 = 0; 1214 PetscTruth hasop; 1215 1216 PetscFunctionBegin; 1217 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1218 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1219 } 1220 1221 if (flag == SOR_APPLY_UPPER) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1227 if (flag & SOR_ZERO_INITIAL_GUESS) { 1228 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1229 its--; 1230 } 1231 1232 while (its--) { 1233 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 1236 /* update rhs: bb1 = bb - B*x */ 1237 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1238 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1239 1240 /* local sweep */ 1241 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1242 } 1243 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1244 if (flag & SOR_ZERO_INITIAL_GUESS) { 1245 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1246 its--; 1247 } 1248 while (its--) { 1249 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 1252 /* update rhs: bb1 = bb - B*x */ 1253 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1254 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1255 1256 /* local sweep */ 1257 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1258 } 1259 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1260 if (flag & SOR_ZERO_INITIAL_GUESS) { 1261 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1262 its--; 1263 } 1264 while (its--) { 1265 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 1268 /* update rhs: bb1 = bb - B*x */ 1269 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1270 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1271 1272 /* local sweep */ 1273 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1274 } 1275 } else if (flag & SOR_EISENSTAT) { 1276 Vec xx1; 1277 1278 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1279 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1280 1281 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 if (!mat->diag) { 1284 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1285 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1286 } 1287 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1288 if (hasop) { 1289 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1290 } else { 1291 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1292 } 1293 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1294 1295 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1296 1297 /* local sweep */ 1298 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1299 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1300 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1301 } else { 1302 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1303 } 1304 1305 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatPermute_MPIAIJ" 1311 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1312 { 1313 MPI_Comm comm,pcomm; 1314 PetscInt first,local_size,nrows; 1315 const PetscInt *rows; 1316 PetscMPIInt size; 1317 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1322 /* make a collective version of 'rowp' */ 1323 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1324 if (pcomm==comm) { 1325 crowp = rowp; 1326 } else { 1327 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1328 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1329 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1330 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1331 } 1332 /* collect the global row permutation and invert it */ 1333 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1334 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1335 if (pcomm!=comm) { 1336 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1337 } 1338 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1339 /* get the local target indices */ 1340 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1341 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1343 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1344 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1345 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1346 /* the column permutation is so much easier; 1347 make a local version of 'colp' and invert it */ 1348 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1349 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1350 if (size==1) { 1351 lcolp = colp; 1352 } else { 1353 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1354 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1355 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1356 } 1357 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1358 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1359 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1360 if (size>1) { 1361 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1362 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1363 } 1364 /* now we just get the submatrix */ 1365 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1366 /* clean up */ 1367 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1368 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1374 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1375 { 1376 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1377 Mat A = mat->A,B = mat->B; 1378 PetscErrorCode ierr; 1379 PetscReal isend[5],irecv[5]; 1380 1381 PetscFunctionBegin; 1382 info->block_size = 1.0; 1383 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1384 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1385 isend[3] = info->memory; isend[4] = info->mallocs; 1386 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1387 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1388 isend[3] += info->memory; isend[4] += info->mallocs; 1389 if (flag == MAT_LOCAL) { 1390 info->nz_used = isend[0]; 1391 info->nz_allocated = isend[1]; 1392 info->nz_unneeded = isend[2]; 1393 info->memory = isend[3]; 1394 info->mallocs = isend[4]; 1395 } else if (flag == MAT_GLOBAL_MAX) { 1396 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1397 info->nz_used = irecv[0]; 1398 info->nz_allocated = irecv[1]; 1399 info->nz_unneeded = irecv[2]; 1400 info->memory = irecv[3]; 1401 info->mallocs = irecv[4]; 1402 } else if (flag == MAT_GLOBAL_SUM) { 1403 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1404 info->nz_used = irecv[0]; 1405 info->nz_allocated = irecv[1]; 1406 info->nz_unneeded = irecv[2]; 1407 info->memory = irecv[3]; 1408 info->mallocs = irecv[4]; 1409 } 1410 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1411 info->fill_ratio_needed = 0; 1412 info->factor_mallocs = 0; 1413 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatSetOption_MPIAIJ" 1419 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1420 { 1421 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 switch (op) { 1426 case MAT_NEW_NONZERO_LOCATIONS: 1427 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1428 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1429 case MAT_KEEP_NONZERO_PATTERN: 1430 case MAT_NEW_NONZERO_LOCATION_ERR: 1431 case MAT_USE_INODES: 1432 case MAT_IGNORE_ZERO_ENTRIES: 1433 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1434 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1435 break; 1436 case MAT_ROW_ORIENTED: 1437 a->roworiented = flg; 1438 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1439 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1440 break; 1441 case MAT_NEW_DIAGONALS: 1442 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1443 break; 1444 case MAT_IGNORE_OFF_PROC_ENTRIES: 1445 a->donotstash = PETSC_TRUE; 1446 break; 1447 case MAT_SYMMETRIC: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 case MAT_STRUCTURALLY_SYMMETRIC: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 case MAT_HERMITIAN: 1454 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1455 break; 1456 case MAT_SYMMETRY_ETERNAL: 1457 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1458 break; 1459 default: 1460 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatGetRow_MPIAIJ" 1467 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1468 { 1469 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1470 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1471 PetscErrorCode ierr; 1472 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1473 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1474 PetscInt *cmap,*idx_p; 1475 1476 PetscFunctionBegin; 1477 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1478 mat->getrowactive = PETSC_TRUE; 1479 1480 if (!mat->rowvalues && (idx || v)) { 1481 /* 1482 allocate enough space to hold information from the longest row. 1483 */ 1484 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1485 PetscInt max = 1,tmp; 1486 for (i=0; i<matin->rmap->n; i++) { 1487 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1488 if (max < tmp) { max = tmp; } 1489 } 1490 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1491 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1492 } 1493 1494 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1495 lrow = row - rstart; 1496 1497 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1498 if (!v) {pvA = 0; pvB = 0;} 1499 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1500 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 nztot = nzA + nzB; 1503 1504 cmap = mat->garray; 1505 if (v || idx) { 1506 if (nztot) { 1507 /* Sort by increasing column numbers, assuming A and B already sorted */ 1508 PetscInt imark = -1; 1509 if (v) { 1510 *v = v_p = mat->rowvalues; 1511 for (i=0; i<nzB; i++) { 1512 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1513 else break; 1514 } 1515 imark = i; 1516 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1517 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1518 } 1519 if (idx) { 1520 *idx = idx_p = mat->rowindices; 1521 if (imark > -1) { 1522 for (i=0; i<imark; i++) { 1523 idx_p[i] = cmap[cworkB[i]]; 1524 } 1525 } else { 1526 for (i=0; i<nzB; i++) { 1527 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1528 else break; 1529 } 1530 imark = i; 1531 } 1532 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1533 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 if (idx) *idx = 0; 1537 if (v) *v = 0; 1538 } 1539 } 1540 *nz = nztot; 1541 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1551 1552 PetscFunctionBegin; 1553 if (!aij->getrowactive) { 1554 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1555 } 1556 aij->getrowactive = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatNorm_MPIAIJ" 1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1563 { 1564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1565 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,cstart = mat->cmap->rstart; 1568 PetscReal sum = 0.0; 1569 MatScalar *v; 1570 1571 PetscFunctionBegin; 1572 if (aij->size == 1) { 1573 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1574 } else { 1575 if (type == NORM_FROBENIUS) { 1576 v = amat->a; 1577 for (i=0; i<amat->nz; i++) { 1578 #if defined(PETSC_USE_COMPLEX) 1579 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1580 #else 1581 sum += (*v)*(*v); v++; 1582 #endif 1583 } 1584 v = bmat->a; 1585 for (i=0; i<bmat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1593 *norm = sqrt(*norm); 1594 } else if (type == NORM_1) { /* max column norm */ 1595 PetscReal *tmp,*tmp2; 1596 PetscInt *jj,*garray = aij->garray; 1597 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1599 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1600 *norm = 0.0; 1601 v = amat->a; jj = amat->j; 1602 for (j=0; j<amat->nz; j++) { 1603 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1604 } 1605 v = bmat->a; jj = bmat->j; 1606 for (j=0; j<bmat->nz; j++) { 1607 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1608 } 1609 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1610 for (j=0; j<mat->cmap->N; j++) { 1611 if (tmp2[j] > *norm) *norm = tmp2[j]; 1612 } 1613 ierr = PetscFree(tmp);CHKERRQ(ierr); 1614 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1615 } else if (type == NORM_INFINITY) { /* max row norm */ 1616 PetscReal ntemp = 0.0; 1617 for (j=0; j<aij->A->rmap->n; j++) { 1618 v = amat->a + amat->i[j]; 1619 sum = 0.0; 1620 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1621 sum += PetscAbsScalar(*v); v++; 1622 } 1623 v = bmat->a + bmat->i[j]; 1624 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1625 sum += PetscAbsScalar(*v); v++; 1626 } 1627 if (sum > ntemp) ntemp = sum; 1628 } 1629 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1630 } else { 1631 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1632 } 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatTranspose_MPIAIJ" 1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1640 { 1641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1642 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1643 PetscErrorCode ierr; 1644 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1645 PetscInt cstart=A->cmap->rstart,ncol; 1646 Mat B; 1647 MatScalar *array; 1648 1649 PetscFunctionBegin; 1650 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1651 1652 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1653 ai = Aloc->i; aj = Aloc->j; 1654 bi = Bloc->i; bj = Bloc->j; 1655 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1656 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1657 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1658 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1659 for (i=0; i<ai[ma]; i++){ 1660 d_nnz[aj[i]] ++; 1661 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1662 } 1663 1664 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1665 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1666 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1667 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1668 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1669 } else { 1670 B = *matout; 1671 } 1672 1673 /* copy over the A part */ 1674 array = Aloc->a; 1675 row = A->rmap->rstart; 1676 for (i=0; i<ma; i++) { 1677 ncol = ai[i+1]-ai[i]; 1678 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1679 row++; array += ncol; aj += ncol; 1680 } 1681 aj = Aloc->j; 1682 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1683 1684 /* copy over the B part */ 1685 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1686 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1687 array = Bloc->a; 1688 row = A->rmap->rstart; 1689 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1690 cols_tmp = cols; 1691 for (i=0; i<mb; i++) { 1692 ncol = bi[i+1]-bi[i]; 1693 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1694 row++; array += ncol; cols_tmp += ncol; 1695 } 1696 ierr = PetscFree(cols);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1701 *matout = B; 1702 } else { 1703 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1713 Mat a = aij->A,b = aij->B; 1714 PetscErrorCode ierr; 1715 PetscInt s1,s2,s3; 1716 1717 PetscFunctionBegin; 1718 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1719 if (rr) { 1720 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1721 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1722 /* Overlap communication with computation. */ 1723 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 } 1725 if (ll) { 1726 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1727 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1728 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1729 } 1730 /* scale the diagonal block */ 1731 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1732 1733 if (rr) { 1734 /* Do a scatter end and then right scale the off-diagonal block */ 1735 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1737 } 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1751 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1756 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1757 { 1758 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1759 PetscErrorCode ierr; 1760 1761 PetscFunctionBegin; 1762 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatEqual_MPIAIJ" 1768 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1769 { 1770 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1771 Mat a,b,c,d; 1772 PetscTruth flg; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 a = matA->A; b = matA->B; 1777 c = matB->A; d = matB->B; 1778 1779 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1780 if (flg) { 1781 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1782 } 1783 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatCopy_MPIAIJ" 1789 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1790 { 1791 PetscErrorCode ierr; 1792 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1793 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1794 1795 PetscFunctionBegin; 1796 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1797 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1798 /* because of the column compression in the off-processor part of the matrix a->B, 1799 the number of columns in a->B and b->B may be different, hence we cannot call 1800 the MatCopy() directly on the two parts. If need be, we can provide a more 1801 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1802 then copying the submatrices */ 1803 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1806 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1807 } 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1813 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1814 { 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #include "petscblaslapack.h" 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatAXPY_MPIAIJ" 1825 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1826 { 1827 PetscErrorCode ierr; 1828 PetscInt i; 1829 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1830 PetscBLASInt bnz,one=1; 1831 Mat_SeqAIJ *x,*y; 1832 1833 PetscFunctionBegin; 1834 if (str == SAME_NONZERO_PATTERN) { 1835 PetscScalar alpha = a; 1836 x = (Mat_SeqAIJ *)xx->A->data; 1837 y = (Mat_SeqAIJ *)yy->A->data; 1838 bnz = PetscBLASIntCast(x->nz); 1839 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1840 x = (Mat_SeqAIJ *)xx->B->data; 1841 y = (Mat_SeqAIJ *)yy->B->data; 1842 bnz = PetscBLASIntCast(x->nz); 1843 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1844 } else if (str == SUBSET_NONZERO_PATTERN) { 1845 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1846 1847 x = (Mat_SeqAIJ *)xx->B->data; 1848 y = (Mat_SeqAIJ *)yy->B->data; 1849 if (y->xtoy && y->XtoY != xx->B) { 1850 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1851 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1852 } 1853 if (!y->xtoy) { /* get xtoy */ 1854 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1855 y->XtoY = xx->B; 1856 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1857 } 1858 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1859 } else { 1860 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatConjugate_MPIAIJ" 1869 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1870 { 1871 #if defined(PETSC_USE_COMPLEX) 1872 PetscErrorCode ierr; 1873 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1874 1875 PetscFunctionBegin; 1876 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1877 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1878 #else 1879 PetscFunctionBegin; 1880 #endif 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatRealPart_MPIAIJ" 1886 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1887 { 1888 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1893 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1899 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1900 { 1901 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1906 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #ifdef PETSC_HAVE_PBGL 1911 1912 #include <boost/parallel/mpi/bsp_process_group.hpp> 1913 #include <boost/graph/distributed/ilu_default_graph.hpp> 1914 #include <boost/graph/distributed/ilu_0_block.hpp> 1915 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1916 #include <boost/graph/distributed/petsc/interface.hpp> 1917 #include <boost/multi_array.hpp> 1918 #include <boost/parallel/distributed_property_map->hpp> 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1922 /* 1923 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1924 */ 1925 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1926 { 1927 namespace petsc = boost::distributed::petsc; 1928 1929 namespace graph_dist = boost::graph::distributed; 1930 using boost::graph::distributed::ilu_default::process_group_type; 1931 using boost::graph::ilu_permuted; 1932 1933 PetscTruth row_identity, col_identity; 1934 PetscContainer c; 1935 PetscInt m, n, M, N; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1940 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1941 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1942 if (!row_identity || !col_identity) { 1943 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1944 } 1945 1946 process_group_type pg; 1947 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1948 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1949 lgraph_type& level_graph = *lgraph_p; 1950 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1951 1952 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1953 ilu_permuted(level_graph); 1954 1955 /* put together the new matrix */ 1956 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1957 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1958 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1959 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1960 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1961 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1962 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 1964 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1965 ierr = PetscContainerSetPointer(c, lgraph_p); 1966 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1972 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1973 { 1974 PetscFunctionBegin; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatSolve_MPIAIJ" 1980 /* 1981 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1982 */ 1983 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1984 { 1985 namespace graph_dist = boost::graph::distributed; 1986 1987 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1988 lgraph_type* lgraph_p; 1989 PetscContainer c; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1994 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1995 ierr = VecCopy(b, x);CHKERRQ(ierr); 1996 1997 PetscScalar* array_x; 1998 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1999 PetscInt sx; 2000 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2001 2002 PetscScalar* array_b; 2003 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2004 PetscInt sb; 2005 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2006 2007 lgraph_type& level_graph = *lgraph_p; 2008 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2009 2010 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2011 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2012 ref_x(array_x, boost::extents[num_vertices(graph)]); 2013 2014 typedef boost::iterator_property_map<array_ref_type::iterator, 2015 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2016 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2017 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2018 2019 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2020 2021 PetscFunctionReturn(0); 2022 } 2023 #endif 2024 2025 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2026 PetscInt nzlocal,nsends,nrecvs; 2027 PetscMPIInt *send_rank; 2028 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2029 PetscScalar *sbuf_a,**rbuf_a; 2030 PetscErrorCode (*MatDestroy)(Mat); 2031 } Mat_Redundant; 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2035 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2036 { 2037 PetscErrorCode ierr; 2038 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2039 PetscInt i; 2040 2041 PetscFunctionBegin; 2042 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2043 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2044 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2045 for (i=0; i<redund->nrecvs; i++){ 2046 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2047 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2048 } 2049 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2050 ierr = PetscFree(redund);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatDestroy_MatRedundant" 2056 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2057 { 2058 PetscErrorCode ierr; 2059 PetscContainer container; 2060 Mat_Redundant *redund=PETSC_NULL; 2061 2062 PetscFunctionBegin; 2063 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2064 if (container) { 2065 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2066 } else { 2067 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2068 } 2069 A->ops->destroy = redund->MatDestroy; 2070 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2071 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2072 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2078 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2079 { 2080 PetscMPIInt rank,size; 2081 MPI_Comm comm=((PetscObject)mat)->comm; 2082 PetscErrorCode ierr; 2083 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2084 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2085 PetscInt *rowrange=mat->rmap->range; 2086 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2087 Mat A=aij->A,B=aij->B,C=*matredundant; 2088 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2089 PetscScalar *sbuf_a; 2090 PetscInt nzlocal=a->nz+b->nz; 2091 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2092 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2093 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2094 MatScalar *aworkA,*aworkB; 2095 PetscScalar *vals; 2096 PetscMPIInt tag1,tag2,tag3,imdex; 2097 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2098 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2099 MPI_Status recv_status,*send_status; 2100 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2101 PetscInt **rbuf_j=PETSC_NULL; 2102 PetscScalar **rbuf_a=PETSC_NULL; 2103 Mat_Redundant *redund=PETSC_NULL; 2104 PetscContainer container; 2105 2106 PetscFunctionBegin; 2107 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2109 2110 if (reuse == MAT_REUSE_MATRIX) { 2111 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2112 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2113 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2115 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2116 if (container) { 2117 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2118 } else { 2119 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2120 } 2121 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2122 2123 nsends = redund->nsends; 2124 nrecvs = redund->nrecvs; 2125 send_rank = redund->send_rank; recv_rank = send_rank + size; 2126 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2127 sbuf_j = redund->sbuf_j; 2128 sbuf_a = redund->sbuf_a; 2129 rbuf_j = redund->rbuf_j; 2130 rbuf_a = redund->rbuf_a; 2131 } 2132 2133 if (reuse == MAT_INITIAL_MATRIX){ 2134 PetscMPIInt subrank,subsize; 2135 PetscInt nleftover,np_subcomm; 2136 /* get the destination processors' id send_rank, nsends and nrecvs */ 2137 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2138 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2139 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2140 recv_rank = send_rank + size; 2141 np_subcomm = size/nsubcomm; 2142 nleftover = size - nsubcomm*np_subcomm; 2143 nsends = 0; nrecvs = 0; 2144 for (i=0; i<size; i++){ /* i=rank*/ 2145 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2146 send_rank[nsends] = i; nsends++; 2147 recv_rank[nrecvs++] = i; 2148 } 2149 } 2150 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2151 i = size-nleftover-1; 2152 j = 0; 2153 while (j < nsubcomm - nleftover){ 2154 send_rank[nsends++] = i; 2155 i--; j++; 2156 } 2157 } 2158 2159 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2160 for (i=0; i<nleftover; i++){ 2161 recv_rank[nrecvs++] = size-nleftover+i; 2162 } 2163 } 2164 2165 /* allocate sbuf_j, sbuf_a */ 2166 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2167 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2168 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2169 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2170 2171 /* copy mat's local entries into the buffers */ 2172 if (reuse == MAT_INITIAL_MATRIX){ 2173 rownz_max = 0; 2174 rptr = sbuf_j; 2175 cols = sbuf_j + rend-rstart + 1; 2176 vals = sbuf_a; 2177 rptr[0] = 0; 2178 for (i=0; i<rend-rstart; i++){ 2179 row = i + rstart; 2180 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2181 ncols = nzA + nzB; 2182 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2183 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2184 /* load the column indices for this row into cols */ 2185 lwrite = 0; 2186 for (l=0; l<nzB; l++) { 2187 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2188 vals[lwrite] = aworkB[l]; 2189 cols[lwrite++] = ctmp; 2190 } 2191 } 2192 for (l=0; l<nzA; l++){ 2193 vals[lwrite] = aworkA[l]; 2194 cols[lwrite++] = cstart + cworkA[l]; 2195 } 2196 for (l=0; l<nzB; l++) { 2197 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2198 vals[lwrite] = aworkB[l]; 2199 cols[lwrite++] = ctmp; 2200 } 2201 } 2202 vals += ncols; 2203 cols += ncols; 2204 rptr[i+1] = rptr[i] + ncols; 2205 if (rownz_max < ncols) rownz_max = ncols; 2206 } 2207 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2208 } else { /* only copy matrix values into sbuf_a */ 2209 rptr = sbuf_j; 2210 vals = sbuf_a; 2211 rptr[0] = 0; 2212 for (i=0; i<rend-rstart; i++){ 2213 row = i + rstart; 2214 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2215 ncols = nzA + nzB; 2216 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2217 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2218 lwrite = 0; 2219 for (l=0; l<nzB; l++) { 2220 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2221 } 2222 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2223 for (l=0; l<nzB; l++) { 2224 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2225 } 2226 vals += ncols; 2227 rptr[i+1] = rptr[i] + ncols; 2228 } 2229 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2230 2231 /* send nzlocal to others, and recv other's nzlocal */ 2232 /*--------------------------------------------------*/ 2233 if (reuse == MAT_INITIAL_MATRIX){ 2234 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2235 s_waits2 = s_waits3 + nsends; 2236 s_waits1 = s_waits2 + nsends; 2237 r_waits1 = s_waits1 + nsends; 2238 r_waits2 = r_waits1 + nrecvs; 2239 r_waits3 = r_waits2 + nrecvs; 2240 } else { 2241 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2242 r_waits3 = s_waits3 + nsends; 2243 } 2244 2245 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2246 if (reuse == MAT_INITIAL_MATRIX){ 2247 /* get new tags to keep the communication clean */ 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2249 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2250 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2251 rbuf_nz = sbuf_nz + nsends; 2252 2253 /* post receives of other's nzlocal */ 2254 for (i=0; i<nrecvs; i++){ 2255 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2256 } 2257 /* send nzlocal to others */ 2258 for (i=0; i<nsends; i++){ 2259 sbuf_nz[i] = nzlocal; 2260 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2261 } 2262 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2263 count = nrecvs; 2264 while (count) { 2265 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2266 recv_rank[imdex] = recv_status.MPI_SOURCE; 2267 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2268 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2269 2270 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2271 rbuf_nz[imdex] += i + 2; 2272 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2273 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2274 count--; 2275 } 2276 /* wait on sends of nzlocal */ 2277 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2278 /* send mat->i,j to others, and recv from other's */ 2279 /*------------------------------------------------*/ 2280 for (i=0; i<nsends; i++){ 2281 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2282 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2283 } 2284 /* wait on receives of mat->i,j */ 2285 /*------------------------------*/ 2286 count = nrecvs; 2287 while (count) { 2288 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2289 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2290 count--; 2291 } 2292 /* wait on sends of mat->i,j */ 2293 /*---------------------------*/ 2294 if (nsends) { 2295 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2296 } 2297 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2298 2299 /* post receives, send and receive mat->a */ 2300 /*----------------------------------------*/ 2301 for (imdex=0; imdex<nrecvs; imdex++) { 2302 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2303 } 2304 for (i=0; i<nsends; i++){ 2305 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2306 } 2307 count = nrecvs; 2308 while (count) { 2309 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2310 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2311 count--; 2312 } 2313 if (nsends) { 2314 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2315 } 2316 2317 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2318 2319 /* create redundant matrix */ 2320 /*-------------------------*/ 2321 if (reuse == MAT_INITIAL_MATRIX){ 2322 /* compute rownz_max for preallocation */ 2323 for (imdex=0; imdex<nrecvs; imdex++){ 2324 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2325 rptr = rbuf_j[imdex]; 2326 for (i=0; i<j; i++){ 2327 ncols = rptr[i+1] - rptr[i]; 2328 if (rownz_max < ncols) rownz_max = ncols; 2329 } 2330 } 2331 2332 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2333 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2334 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2335 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2336 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2337 } else { 2338 C = *matredundant; 2339 } 2340 2341 /* insert local matrix entries */ 2342 rptr = sbuf_j; 2343 cols = sbuf_j + rend-rstart + 1; 2344 vals = sbuf_a; 2345 for (i=0; i<rend-rstart; i++){ 2346 row = i + rstart; 2347 ncols = rptr[i+1] - rptr[i]; 2348 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2349 vals += ncols; 2350 cols += ncols; 2351 } 2352 /* insert received matrix entries */ 2353 for (imdex=0; imdex<nrecvs; imdex++){ 2354 rstart = rowrange[recv_rank[imdex]]; 2355 rend = rowrange[recv_rank[imdex]+1]; 2356 rptr = rbuf_j[imdex]; 2357 cols = rbuf_j[imdex] + rend-rstart + 1; 2358 vals = rbuf_a[imdex]; 2359 for (i=0; i<rend-rstart; i++){ 2360 row = i + rstart; 2361 ncols = rptr[i+1] - rptr[i]; 2362 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2363 vals += ncols; 2364 cols += ncols; 2365 } 2366 } 2367 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2370 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2371 if (reuse == MAT_INITIAL_MATRIX){ 2372 PetscContainer container; 2373 *matredundant = C; 2374 /* create a supporting struct and attach it to C for reuse */ 2375 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2376 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2377 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2378 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2380 2381 redund->nzlocal = nzlocal; 2382 redund->nsends = nsends; 2383 redund->nrecvs = nrecvs; 2384 redund->send_rank = send_rank; 2385 redund->sbuf_nz = sbuf_nz; 2386 redund->sbuf_j = sbuf_j; 2387 redund->sbuf_a = sbuf_a; 2388 redund->rbuf_j = rbuf_j; 2389 redund->rbuf_a = rbuf_a; 2390 2391 redund->MatDestroy = C->ops->destroy; 2392 C->ops->destroy = MatDestroy_MatRedundant; 2393 } 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2399 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2400 { 2401 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2402 PetscErrorCode ierr; 2403 PetscInt i,*idxb = 0; 2404 PetscScalar *va,*vb; 2405 Vec vtmp; 2406 2407 PetscFunctionBegin; 2408 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2409 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2410 if (idx) { 2411 for (i=0; i<A->rmap->n; i++) { 2412 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2413 } 2414 } 2415 2416 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2417 if (idx) { 2418 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2419 } 2420 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2421 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2422 2423 for (i=0; i<A->rmap->n; i++){ 2424 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2425 va[i] = vb[i]; 2426 if (idx) idx[i] = a->garray[idxb[i]]; 2427 } 2428 } 2429 2430 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2431 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2432 if (idxb) { 2433 ierr = PetscFree(idxb);CHKERRQ(ierr); 2434 } 2435 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2441 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2442 { 2443 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2444 PetscErrorCode ierr; 2445 PetscInt i,*idxb = 0; 2446 PetscScalar *va,*vb; 2447 Vec vtmp; 2448 2449 PetscFunctionBegin; 2450 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2451 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2452 if (idx) { 2453 for (i=0; i<A->cmap->n; i++) { 2454 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2455 } 2456 } 2457 2458 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2459 if (idx) { 2460 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2461 } 2462 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2463 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2464 2465 for (i=0; i<A->rmap->n; i++){ 2466 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2467 va[i] = vb[i]; 2468 if (idx) idx[i] = a->garray[idxb[i]]; 2469 } 2470 } 2471 2472 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2473 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2474 if (idxb) { 2475 ierr = PetscFree(idxb);CHKERRQ(ierr); 2476 } 2477 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2483 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2484 { 2485 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2486 PetscInt n = A->rmap->n; 2487 PetscInt cstart = A->cmap->rstart; 2488 PetscInt *cmap = mat->garray; 2489 PetscInt *diagIdx, *offdiagIdx; 2490 Vec diagV, offdiagV; 2491 PetscScalar *a, *diagA, *offdiagA; 2492 PetscInt r; 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2497 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2498 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2499 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2500 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2502 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2503 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2504 for(r = 0; r < n; ++r) { 2505 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2506 a[r] = diagA[r]; 2507 idx[r] = cstart + diagIdx[r]; 2508 } else { 2509 a[r] = offdiagA[r]; 2510 idx[r] = cmap[offdiagIdx[r]]; 2511 } 2512 } 2513 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2514 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2515 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2516 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2517 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2518 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2524 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2525 { 2526 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2527 PetscInt n = A->rmap->n; 2528 PetscInt cstart = A->cmap->rstart; 2529 PetscInt *cmap = mat->garray; 2530 PetscInt *diagIdx, *offdiagIdx; 2531 Vec diagV, offdiagV; 2532 PetscScalar *a, *diagA, *offdiagA; 2533 PetscInt r; 2534 PetscErrorCode ierr; 2535 2536 PetscFunctionBegin; 2537 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2538 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2539 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2540 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2541 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2543 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2544 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2545 for(r = 0; r < n; ++r) { 2546 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2547 a[r] = diagA[r]; 2548 idx[r] = cstart + diagIdx[r]; 2549 } else { 2550 a[r] = offdiagA[r]; 2551 idx[r] = cmap[offdiagIdx[r]]; 2552 } 2553 } 2554 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2555 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2556 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2557 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2558 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2559 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 #undef __FUNCT__ 2564 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2565 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2566 { 2567 PetscErrorCode ierr; 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2572 *newmat = *dummy; 2573 ierr = PetscFree(dummy);CHKERRQ(ierr); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2578 /* -------------------------------------------------------------------*/ 2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2580 MatGetRow_MPIAIJ, 2581 MatRestoreRow_MPIAIJ, 2582 MatMult_MPIAIJ, 2583 /* 4*/ MatMultAdd_MPIAIJ, 2584 MatMultTranspose_MPIAIJ, 2585 MatMultTransposeAdd_MPIAIJ, 2586 #ifdef PETSC_HAVE_PBGL 2587 MatSolve_MPIAIJ, 2588 #else 2589 0, 2590 #endif 2591 0, 2592 0, 2593 /*10*/ 0, 2594 0, 2595 0, 2596 MatSOR_MPIAIJ, 2597 MatTranspose_MPIAIJ, 2598 /*15*/ MatGetInfo_MPIAIJ, 2599 MatEqual_MPIAIJ, 2600 MatGetDiagonal_MPIAIJ, 2601 MatDiagonalScale_MPIAIJ, 2602 MatNorm_MPIAIJ, 2603 /*20*/ MatAssemblyBegin_MPIAIJ, 2604 MatAssemblyEnd_MPIAIJ, 2605 MatSetOption_MPIAIJ, 2606 MatZeroEntries_MPIAIJ, 2607 /*24*/ MatZeroRows_MPIAIJ, 2608 0, 2609 #ifdef PETSC_HAVE_PBGL 2610 0, 2611 #else 2612 0, 2613 #endif 2614 0, 2615 0, 2616 /*29*/ MatSetUpPreallocation_MPIAIJ, 2617 #ifdef PETSC_HAVE_PBGL 2618 0, 2619 #else 2620 0, 2621 #endif 2622 0, 2623 0, 2624 0, 2625 /*34*/ MatDuplicate_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*39*/ MatAXPY_MPIAIJ, 2631 MatGetSubMatrices_MPIAIJ, 2632 MatIncreaseOverlap_MPIAIJ, 2633 MatGetValues_MPIAIJ, 2634 MatCopy_MPIAIJ, 2635 /*44*/ MatGetRowMax_MPIAIJ, 2636 MatScale_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 /*49*/ MatSetBlockSize_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 0, 2645 /*54*/ MatFDColoringCreate_MPIAIJ, 2646 0, 2647 MatSetUnfactored_MPIAIJ, 2648 MatPermute_MPIAIJ, 2649 0, 2650 /*59*/ MatGetSubMatrix_MPIAIJ, 2651 MatDestroy_MPIAIJ, 2652 MatView_MPIAIJ, 2653 0, 2654 0, 2655 /*64*/ 0, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2661 MatGetRowMinAbs_MPIAIJ, 2662 0, 2663 MatSetColoring_MPIAIJ, 2664 #if defined(PETSC_HAVE_ADIC) 2665 MatSetValuesAdic_MPIAIJ, 2666 #else 2667 0, 2668 #endif 2669 MatSetValuesAdifor_MPIAIJ, 2670 /*75*/ MatFDColoringApply_AIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*80*/ 0, 2676 0, 2677 0, 2678 /*83*/ MatLoad_MPIAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2685 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2686 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2687 MatPtAP_Basic, 2688 MatPtAPSymbolic_MPIAIJ, 2689 /*94*/ MatPtAPNumeric_MPIAIJ, 2690 0, 2691 0, 2692 0, 2693 0, 2694 /*99*/ 0, 2695 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2696 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2697 MatConjugate_MPIAIJ, 2698 0, 2699 /*104*/MatSetValuesRow_MPIAIJ, 2700 MatRealPart_MPIAIJ, 2701 MatImaginaryPart_MPIAIJ, 2702 0, 2703 0, 2704 /*109*/0, 2705 MatGetRedundantMatrix_MPIAIJ, 2706 MatGetRowMin_MPIAIJ, 2707 0, 2708 0, 2709 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2710 0, 2711 0, 2712 0, 2713 0, 2714 0 2715 }; 2716 2717 /* ----------------------------------------------------------------------------------------*/ 2718 2719 EXTERN_C_BEGIN 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2722 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2723 { 2724 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2729 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2730 PetscFunctionReturn(0); 2731 } 2732 EXTERN_C_END 2733 2734 EXTERN_C_BEGIN 2735 #undef __FUNCT__ 2736 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2737 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2738 { 2739 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2744 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 EXTERN_C_END 2748 2749 #include "petscpc.h" 2750 EXTERN_C_BEGIN 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2753 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2754 { 2755 Mat_MPIAIJ *b; 2756 PetscErrorCode ierr; 2757 PetscInt i; 2758 2759 PetscFunctionBegin; 2760 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2761 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2762 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2763 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2764 2765 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2767 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2768 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2769 if (d_nnz) { 2770 for (i=0; i<B->rmap->n; i++) { 2771 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2772 } 2773 } 2774 if (o_nnz) { 2775 for (i=0; i<B->rmap->n; i++) { 2776 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2777 } 2778 } 2779 b = (Mat_MPIAIJ*)B->data; 2780 2781 if (!B->preallocated) { 2782 /* Explicitly create 2 MATSEQAIJ matrices. */ 2783 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2784 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2785 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2789 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2791 } 2792 2793 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2794 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2795 B->preallocated = PETSC_TRUE; 2796 PetscFunctionReturn(0); 2797 } 2798 EXTERN_C_END 2799 2800 #undef __FUNCT__ 2801 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2802 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2803 { 2804 Mat mat; 2805 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 *newmat = 0; 2810 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2811 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2812 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2813 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2814 a = (Mat_MPIAIJ*)mat->data; 2815 2816 mat->factor = matin->factor; 2817 mat->rmap->bs = matin->rmap->bs; 2818 mat->assembled = PETSC_TRUE; 2819 mat->insertmode = NOT_SET_VALUES; 2820 mat->preallocated = PETSC_TRUE; 2821 2822 a->size = oldmat->size; 2823 a->rank = oldmat->rank; 2824 a->donotstash = oldmat->donotstash; 2825 a->roworiented = oldmat->roworiented; 2826 a->rowindices = 0; 2827 a->rowvalues = 0; 2828 a->getrowactive = PETSC_FALSE; 2829 2830 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2831 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2832 2833 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2834 if (oldmat->colmap) { 2835 #if defined (PETSC_USE_CTABLE) 2836 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2837 #else 2838 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2839 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2840 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2841 #endif 2842 } else a->colmap = 0; 2843 if (oldmat->garray) { 2844 PetscInt len; 2845 len = oldmat->B->cmap->n; 2846 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2847 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2848 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2849 } else a->garray = 0; 2850 2851 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2852 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2853 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2855 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2857 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2859 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2860 *newmat = mat; 2861 PetscFunctionReturn(0); 2862 } 2863 2864 #include "petscsys.h" 2865 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "MatLoad_MPIAIJ" 2868 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2869 { 2870 Mat A; 2871 PetscScalar *vals,*svals; 2872 MPI_Comm comm = ((PetscObject)viewer)->comm; 2873 MPI_Status status; 2874 PetscErrorCode ierr; 2875 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2876 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2877 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2878 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2879 PetscInt cend,cstart,n,*rowners; 2880 int fd; 2881 2882 PetscFunctionBegin; 2883 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2884 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2885 if (!rank) { 2886 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2887 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2888 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2889 } 2890 2891 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2892 M = header[1]; N = header[2]; 2893 /* determine ownership of all rows */ 2894 m = M/size + ((M % size) > rank); 2895 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2896 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2897 2898 /* First process needs enough room for process with most rows */ 2899 if (!rank) { 2900 mmax = rowners[1]; 2901 for (i=2; i<size; i++) { 2902 mmax = PetscMax(mmax,rowners[i]); 2903 } 2904 } else mmax = m; 2905 2906 rowners[0] = 0; 2907 for (i=2; i<=size; i++) { 2908 rowners[i] += rowners[i-1]; 2909 } 2910 rstart = rowners[rank]; 2911 rend = rowners[rank+1]; 2912 2913 /* distribute row lengths to all processors */ 2914 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2915 if (!rank) { 2916 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2917 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2918 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2919 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2920 for (j=0; j<m; j++) { 2921 procsnz[0] += ourlens[j]; 2922 } 2923 for (i=1; i<size; i++) { 2924 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2925 /* calculate the number of nonzeros on each processor */ 2926 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2927 procsnz[i] += rowlengths[j]; 2928 } 2929 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2930 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2931 } 2932 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2933 } else { 2934 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2935 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2936 } 2937 2938 if (!rank) { 2939 /* determine max buffer needed and allocate it */ 2940 maxnz = 0; 2941 for (i=0; i<size; i++) { 2942 maxnz = PetscMax(maxnz,procsnz[i]); 2943 } 2944 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2945 2946 /* read in my part of the matrix column indices */ 2947 nz = procsnz[0]; 2948 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2949 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2950 2951 /* read in every one elses and ship off */ 2952 for (i=1; i<size; i++) { 2953 nz = procsnz[i]; 2954 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2955 mpicnt = PetscMPIIntCast(nz); 2956 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2957 } 2958 ierr = PetscFree(cols);CHKERRQ(ierr); 2959 } else { 2960 /* determine buffer space needed for message */ 2961 nz = 0; 2962 for (i=0; i<m; i++) { 2963 nz += ourlens[i]; 2964 } 2965 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2966 2967 /* receive message of column indices*/ 2968 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2969 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2970 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2971 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2972 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2973 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2974 } 2975 2976 /* determine column ownership if matrix is not square */ 2977 if (N != M) { 2978 n = N/size + ((N % size) > rank); 2979 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2980 cstart = cend - n; 2981 } else { 2982 cstart = rstart; 2983 cend = rend; 2984 n = cend - cstart; 2985 } 2986 2987 /* loop over local rows, determining number of off diagonal entries */ 2988 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2989 jj = 0; 2990 for (i=0; i<m; i++) { 2991 for (j=0; j<ourlens[i]; j++) { 2992 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2993 jj++; 2994 } 2995 } 2996 2997 /* create our matrix */ 2998 for (i=0; i<m; i++) { 2999 ourlens[i] -= offlens[i]; 3000 } 3001 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3002 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3003 ierr = MatSetType(A,type);CHKERRQ(ierr); 3004 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3005 3006 for (i=0; i<m; i++) { 3007 ourlens[i] += offlens[i]; 3008 } 3009 3010 if (!rank) { 3011 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3012 3013 /* read in my part of the matrix numerical values */ 3014 nz = procsnz[0]; 3015 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3016 3017 /* insert into matrix */ 3018 jj = rstart; 3019 smycols = mycols; 3020 svals = vals; 3021 for (i=0; i<m; i++) { 3022 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3023 smycols += ourlens[i]; 3024 svals += ourlens[i]; 3025 jj++; 3026 } 3027 3028 /* read in other processors and ship out */ 3029 for (i=1; i<size; i++) { 3030 nz = procsnz[i]; 3031 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3032 mpicnt = PetscMPIIntCast(nz); 3033 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3034 } 3035 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3036 } else { 3037 /* receive numeric values */ 3038 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3039 3040 /* receive message of values*/ 3041 mpicnt = PetscMPIIntCast(nz); 3042 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3043 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3044 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3045 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3046 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3047 3048 /* insert into matrix */ 3049 jj = rstart; 3050 smycols = mycols; 3051 svals = vals; 3052 for (i=0; i<m; i++) { 3053 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3054 smycols += ourlens[i]; 3055 svals += ourlens[i]; 3056 jj++; 3057 } 3058 } 3059 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3060 ierr = PetscFree(vals);CHKERRQ(ierr); 3061 ierr = PetscFree(mycols);CHKERRQ(ierr); 3062 ierr = PetscFree(rowners);CHKERRQ(ierr); 3063 3064 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3065 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 *newmat = A; 3067 PetscFunctionReturn(0); 3068 } 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3072 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3073 { 3074 PetscErrorCode ierr; 3075 IS iscol_local; 3076 PetscInt csize; 3077 3078 PetscFunctionBegin; 3079 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3080 if (call == MAT_REUSE_MATRIX) { 3081 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3082 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3083 } else { 3084 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3085 } 3086 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3087 if (call == MAT_INITIAL_MATRIX) { 3088 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3089 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3090 } 3091 PetscFunctionReturn(0); 3092 } 3093 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3096 /* 3097 Not great since it makes two copies of the submatrix, first an SeqAIJ 3098 in local and then by concatenating the local matrices the end result. 3099 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3100 3101 Note: This requires a sequential iscol with all indices. 3102 */ 3103 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3104 { 3105 PetscErrorCode ierr; 3106 PetscMPIInt rank,size; 3107 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3108 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3109 Mat *local,M,Mreuse; 3110 MatScalar *vwork,*aa; 3111 MPI_Comm comm = ((PetscObject)mat)->comm; 3112 Mat_SeqAIJ *aij; 3113 3114 3115 PetscFunctionBegin; 3116 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3117 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3118 3119 if (call == MAT_REUSE_MATRIX) { 3120 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3121 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3122 local = &Mreuse; 3123 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3124 } else { 3125 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3126 Mreuse = *local; 3127 ierr = PetscFree(local);CHKERRQ(ierr); 3128 } 3129 3130 /* 3131 m - number of local rows 3132 n - number of columns (same on all processors) 3133 rstart - first row in new global matrix generated 3134 */ 3135 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3136 if (call == MAT_INITIAL_MATRIX) { 3137 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3138 ii = aij->i; 3139 jj = aij->j; 3140 3141 /* 3142 Determine the number of non-zeros in the diagonal and off-diagonal 3143 portions of the matrix in order to do correct preallocation 3144 */ 3145 3146 /* first get start and end of "diagonal" columns */ 3147 if (csize == PETSC_DECIDE) { 3148 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3149 if (mglobal == n) { /* square matrix */ 3150 nlocal = m; 3151 } else { 3152 nlocal = n/size + ((n % size) > rank); 3153 } 3154 } else { 3155 nlocal = csize; 3156 } 3157 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3158 rstart = rend - nlocal; 3159 if (rank == size - 1 && rend != n) { 3160 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3161 } 3162 3163 /* next, compute all the lengths */ 3164 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3165 olens = dlens + m; 3166 for (i=0; i<m; i++) { 3167 jend = ii[i+1] - ii[i]; 3168 olen = 0; 3169 dlen = 0; 3170 for (j=0; j<jend; j++) { 3171 if (*jj < rstart || *jj >= rend) olen++; 3172 else dlen++; 3173 jj++; 3174 } 3175 olens[i] = olen; 3176 dlens[i] = dlen; 3177 } 3178 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3179 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3180 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3181 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3182 ierr = PetscFree(dlens);CHKERRQ(ierr); 3183 } else { 3184 PetscInt ml,nl; 3185 3186 M = *newmat; 3187 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3188 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3189 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3190 /* 3191 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3192 rather than the slower MatSetValues(). 3193 */ 3194 M->was_assembled = PETSC_TRUE; 3195 M->assembled = PETSC_FALSE; 3196 } 3197 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3198 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3199 ii = aij->i; 3200 jj = aij->j; 3201 aa = aij->a; 3202 for (i=0; i<m; i++) { 3203 row = rstart + i; 3204 nz = ii[i+1] - ii[i]; 3205 cwork = jj; jj += nz; 3206 vwork = aa; aa += nz; 3207 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3208 } 3209 3210 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3211 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3212 *newmat = M; 3213 3214 /* save submatrix used in processor for next request */ 3215 if (call == MAT_INITIAL_MATRIX) { 3216 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3217 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3218 } 3219 3220 PetscFunctionReturn(0); 3221 } 3222 3223 EXTERN_C_BEGIN 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3226 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3227 { 3228 PetscInt m,cstart, cend,j,nnz,i,d; 3229 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3230 const PetscInt *JJ; 3231 PetscScalar *values; 3232 PetscErrorCode ierr; 3233 3234 PetscFunctionBegin; 3235 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3236 3237 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3238 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3239 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3241 m = B->rmap->n; 3242 cstart = B->cmap->rstart; 3243 cend = B->cmap->rend; 3244 rstart = B->rmap->rstart; 3245 3246 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3247 o_nnz = d_nnz + m; 3248 3249 #if defined(PETSC_USE_DEBUGGING) 3250 for (i=0; i<m; i++) { 3251 nnz = Ii[i+1]- Ii[i]; 3252 JJ = J + Ii[i]; 3253 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3254 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3255 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3256 for (j=1; j<nnz; j++) { 3257 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3258 } 3259 } 3260 #endif 3261 3262 for (i=0; i<m; i++) { 3263 nnz = Ii[i+1]- Ii[i]; 3264 JJ = J + Ii[i]; 3265 nnz_max = PetscMax(nnz_max,nnz); 3266 for (j=0; j<nnz; j++) { 3267 if (*JJ >= cstart) break; 3268 JJ++; 3269 } 3270 d = 0; 3271 for (; j<nnz; j++) { 3272 if (*JJ++ >= cend) break; 3273 d++; 3274 } 3275 d_nnz[i] = d; 3276 o_nnz[i] = nnz - d; 3277 } 3278 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3279 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3280 3281 if (v) values = (PetscScalar*)v; 3282 else { 3283 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3284 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3285 } 3286 3287 for (i=0; i<m; i++) { 3288 ii = i + rstart; 3289 nnz = Ii[i+1]- Ii[i]; 3290 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3291 } 3292 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3294 3295 if (!v) { 3296 ierr = PetscFree(values);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 EXTERN_C_END 3301 3302 #undef __FUNCT__ 3303 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3304 /*@ 3305 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3306 (the default parallel PETSc format). 3307 3308 Collective on MPI_Comm 3309 3310 Input Parameters: 3311 + B - the matrix 3312 . i - the indices into j for the start of each local row (starts with zero) 3313 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3314 - v - optional values in the matrix 3315 3316 Level: developer 3317 3318 Notes: 3319 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3320 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3321 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3322 3323 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3324 3325 The format which is used for the sparse matrix input, is equivalent to a 3326 row-major ordering.. i.e for the following matrix, the input data expected is 3327 as shown: 3328 3329 1 0 0 3330 2 0 3 P0 3331 ------- 3332 4 5 6 P1 3333 3334 Process0 [P0]: rows_owned=[0,1] 3335 i = {0,1,3} [size = nrow+1 = 2+1] 3336 j = {0,0,2} [size = nz = 6] 3337 v = {1,2,3} [size = nz = 6] 3338 3339 Process1 [P1]: rows_owned=[2] 3340 i = {0,3} [size = nrow+1 = 1+1] 3341 j = {0,1,2} [size = nz = 6] 3342 v = {4,5,6} [size = nz = 6] 3343 3344 The column indices for each row MUST be sorted. 3345 3346 .keywords: matrix, aij, compressed row, sparse, parallel 3347 3348 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3349 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3350 @*/ 3351 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3352 { 3353 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3354 3355 PetscFunctionBegin; 3356 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3357 if (f) { 3358 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3359 } 3360 PetscFunctionReturn(0); 3361 } 3362 3363 #undef __FUNCT__ 3364 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3365 /*@C 3366 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3367 (the default parallel PETSc format). For good matrix assembly performance 3368 the user should preallocate the matrix storage by setting the parameters 3369 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3370 performance can be increased by more than a factor of 50. 3371 3372 Collective on MPI_Comm 3373 3374 Input Parameters: 3375 + A - the matrix 3376 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3377 (same value is used for all local rows) 3378 . d_nnz - array containing the number of nonzeros in the various rows of the 3379 DIAGONAL portion of the local submatrix (possibly different for each row) 3380 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3381 The size of this array is equal to the number of local rows, i.e 'm'. 3382 You must leave room for the diagonal entry even if it is zero. 3383 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3384 submatrix (same value is used for all local rows). 3385 - o_nnz - array containing the number of nonzeros in the various rows of the 3386 OFF-DIAGONAL portion of the local submatrix (possibly different for 3387 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3388 structure. The size of this array is equal to the number 3389 of local rows, i.e 'm'. 3390 3391 If the *_nnz parameter is given then the *_nz parameter is ignored 3392 3393 The AIJ format (also called the Yale sparse matrix format or 3394 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3395 storage. The stored row and column indices begin with zero. See the users manual for details. 3396 3397 The parallel matrix is partitioned such that the first m0 rows belong to 3398 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3399 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3400 3401 The DIAGONAL portion of the local submatrix of a processor can be defined 3402 as the submatrix which is obtained by extraction the part corresponding 3403 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3404 first row that belongs to the processor, and r2 is the last row belonging 3405 to the this processor. This is a square mxm matrix. The remaining portion 3406 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3407 3408 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3409 3410 You can call MatGetInfo() to get information on how effective the preallocation was; 3411 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3412 You can also run with the option -info and look for messages with the string 3413 malloc in them to see if additional memory allocation was needed. 3414 3415 Example usage: 3416 3417 Consider the following 8x8 matrix with 34 non-zero values, that is 3418 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3419 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3420 as follows: 3421 3422 .vb 3423 1 2 0 | 0 3 0 | 0 4 3424 Proc0 0 5 6 | 7 0 0 | 8 0 3425 9 0 10 | 11 0 0 | 12 0 3426 ------------------------------------- 3427 13 0 14 | 15 16 17 | 0 0 3428 Proc1 0 18 0 | 19 20 21 | 0 0 3429 0 0 0 | 22 23 0 | 24 0 3430 ------------------------------------- 3431 Proc2 25 26 27 | 0 0 28 | 29 0 3432 30 0 0 | 31 32 33 | 0 34 3433 .ve 3434 3435 This can be represented as a collection of submatrices as: 3436 3437 .vb 3438 A B C 3439 D E F 3440 G H I 3441 .ve 3442 3443 Where the submatrices A,B,C are owned by proc0, D,E,F are 3444 owned by proc1, G,H,I are owned by proc2. 3445 3446 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3447 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3448 The 'M','N' parameters are 8,8, and have the same values on all procs. 3449 3450 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3451 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3452 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3453 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3454 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3455 matrix, ans [DF] as another SeqAIJ matrix. 3456 3457 When d_nz, o_nz parameters are specified, d_nz storage elements are 3458 allocated for every row of the local diagonal submatrix, and o_nz 3459 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3460 One way to choose d_nz and o_nz is to use the max nonzerors per local 3461 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3462 In this case, the values of d_nz,o_nz are: 3463 .vb 3464 proc0 : dnz = 2, o_nz = 2 3465 proc1 : dnz = 3, o_nz = 2 3466 proc2 : dnz = 1, o_nz = 4 3467 .ve 3468 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3469 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3470 for proc3. i.e we are using 12+15+10=37 storage locations to store 3471 34 values. 3472 3473 When d_nnz, o_nnz parameters are specified, the storage is specified 3474 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3475 In the above case the values for d_nnz,o_nnz are: 3476 .vb 3477 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3478 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3479 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3480 .ve 3481 Here the space allocated is sum of all the above values i.e 34, and 3482 hence pre-allocation is perfect. 3483 3484 Level: intermediate 3485 3486 .keywords: matrix, aij, compressed row, sparse, parallel 3487 3488 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3489 MPIAIJ, MatGetInfo() 3490 @*/ 3491 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3492 { 3493 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3494 3495 PetscFunctionBegin; 3496 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3497 if (f) { 3498 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3499 } 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3505 /*@ 3506 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3507 CSR format the local rows. 3508 3509 Collective on MPI_Comm 3510 3511 Input Parameters: 3512 + comm - MPI communicator 3513 . m - number of local rows (Cannot be PETSC_DECIDE) 3514 . n - This value should be the same as the local size used in creating the 3515 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3516 calculated if N is given) For square matrices n is almost always m. 3517 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3518 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3519 . i - row indices 3520 . j - column indices 3521 - a - matrix values 3522 3523 Output Parameter: 3524 . mat - the matrix 3525 3526 Level: intermediate 3527 3528 Notes: 3529 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3530 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3531 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3532 3533 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3534 3535 The format which is used for the sparse matrix input, is equivalent to a 3536 row-major ordering.. i.e for the following matrix, the input data expected is 3537 as shown: 3538 3539 1 0 0 3540 2 0 3 P0 3541 ------- 3542 4 5 6 P1 3543 3544 Process0 [P0]: rows_owned=[0,1] 3545 i = {0,1,3} [size = nrow+1 = 2+1] 3546 j = {0,0,2} [size = nz = 6] 3547 v = {1,2,3} [size = nz = 6] 3548 3549 Process1 [P1]: rows_owned=[2] 3550 i = {0,3} [size = nrow+1 = 1+1] 3551 j = {0,1,2} [size = nz = 6] 3552 v = {4,5,6} [size = nz = 6] 3553 3554 .keywords: matrix, aij, compressed row, sparse, parallel 3555 3556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3557 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3558 @*/ 3559 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3560 { 3561 PetscErrorCode ierr; 3562 3563 PetscFunctionBegin; 3564 if (i[0]) { 3565 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3566 } 3567 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3568 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3569 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3570 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3571 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3572 PetscFunctionReturn(0); 3573 } 3574 3575 #undef __FUNCT__ 3576 #define __FUNCT__ "MatCreateMPIAIJ" 3577 /*@C 3578 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3579 (the default parallel PETSc format). For good matrix assembly performance 3580 the user should preallocate the matrix storage by setting the parameters 3581 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3582 performance can be increased by more than a factor of 50. 3583 3584 Collective on MPI_Comm 3585 3586 Input Parameters: 3587 + comm - MPI communicator 3588 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3589 This value should be the same as the local size used in creating the 3590 y vector for the matrix-vector product y = Ax. 3591 . n - This value should be the same as the local size used in creating the 3592 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3593 calculated if N is given) For square matrices n is almost always m. 3594 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3595 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3596 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3597 (same value is used for all local rows) 3598 . d_nnz - array containing the number of nonzeros in the various rows of the 3599 DIAGONAL portion of the local submatrix (possibly different for each row) 3600 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3601 The size of this array is equal to the number of local rows, i.e 'm'. 3602 You must leave room for the diagonal entry even if it is zero. 3603 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3604 submatrix (same value is used for all local rows). 3605 - o_nnz - array containing the number of nonzeros in the various rows of the 3606 OFF-DIAGONAL portion of the local submatrix (possibly different for 3607 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3608 structure. The size of this array is equal to the number 3609 of local rows, i.e 'm'. 3610 3611 Output Parameter: 3612 . A - the matrix 3613 3614 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3615 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3616 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3617 3618 Notes: 3619 If the *_nnz parameter is given then the *_nz parameter is ignored 3620 3621 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3622 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3623 storage requirements for this matrix. 3624 3625 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3626 processor than it must be used on all processors that share the object for 3627 that argument. 3628 3629 The user MUST specify either the local or global matrix dimensions 3630 (possibly both). 3631 3632 The parallel matrix is partitioned across processors such that the 3633 first m0 rows belong to process 0, the next m1 rows belong to 3634 process 1, the next m2 rows belong to process 2 etc.. where 3635 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3636 values corresponding to [m x N] submatrix. 3637 3638 The columns are logically partitioned with the n0 columns belonging 3639 to 0th partition, the next n1 columns belonging to the next 3640 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3641 3642 The DIAGONAL portion of the local submatrix on any given processor 3643 is the submatrix corresponding to the rows and columns m,n 3644 corresponding to the given processor. i.e diagonal matrix on 3645 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3646 etc. The remaining portion of the local submatrix [m x (N-n)] 3647 constitute the OFF-DIAGONAL portion. The example below better 3648 illustrates this concept. 3649 3650 For a square global matrix we define each processor's diagonal portion 3651 to be its local rows and the corresponding columns (a square submatrix); 3652 each processor's off-diagonal portion encompasses the remainder of the 3653 local matrix (a rectangular submatrix). 3654 3655 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3656 3657 When calling this routine with a single process communicator, a matrix of 3658 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3659 type of communicator, use the construction mechanism: 3660 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3661 3662 By default, this format uses inodes (identical nodes) when possible. 3663 We search for consecutive rows with the same nonzero structure, thereby 3664 reusing matrix information to achieve increased efficiency. 3665 3666 Options Database Keys: 3667 + -mat_no_inode - Do not use inodes 3668 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3669 - -mat_aij_oneindex - Internally use indexing starting at 1 3670 rather than 0. Note that when calling MatSetValues(), 3671 the user still MUST index entries starting at 0! 3672 3673 3674 Example usage: 3675 3676 Consider the following 8x8 matrix with 34 non-zero values, that is 3677 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3678 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3679 as follows: 3680 3681 .vb 3682 1 2 0 | 0 3 0 | 0 4 3683 Proc0 0 5 6 | 7 0 0 | 8 0 3684 9 0 10 | 11 0 0 | 12 0 3685 ------------------------------------- 3686 13 0 14 | 15 16 17 | 0 0 3687 Proc1 0 18 0 | 19 20 21 | 0 0 3688 0 0 0 | 22 23 0 | 24 0 3689 ------------------------------------- 3690 Proc2 25 26 27 | 0 0 28 | 29 0 3691 30 0 0 | 31 32 33 | 0 34 3692 .ve 3693 3694 This can be represented as a collection of submatrices as: 3695 3696 .vb 3697 A B C 3698 D E F 3699 G H I 3700 .ve 3701 3702 Where the submatrices A,B,C are owned by proc0, D,E,F are 3703 owned by proc1, G,H,I are owned by proc2. 3704 3705 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3706 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3707 The 'M','N' parameters are 8,8, and have the same values on all procs. 3708 3709 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3710 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3711 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3712 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3713 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3714 matrix, ans [DF] as another SeqAIJ matrix. 3715 3716 When d_nz, o_nz parameters are specified, d_nz storage elements are 3717 allocated for every row of the local diagonal submatrix, and o_nz 3718 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3719 One way to choose d_nz and o_nz is to use the max nonzerors per local 3720 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3721 In this case, the values of d_nz,o_nz are: 3722 .vb 3723 proc0 : dnz = 2, o_nz = 2 3724 proc1 : dnz = 3, o_nz = 2 3725 proc2 : dnz = 1, o_nz = 4 3726 .ve 3727 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3728 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3729 for proc3. i.e we are using 12+15+10=37 storage locations to store 3730 34 values. 3731 3732 When d_nnz, o_nnz parameters are specified, the storage is specified 3733 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3734 In the above case the values for d_nnz,o_nnz are: 3735 .vb 3736 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3737 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3738 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3739 .ve 3740 Here the space allocated is sum of all the above values i.e 34, and 3741 hence pre-allocation is perfect. 3742 3743 Level: intermediate 3744 3745 .keywords: matrix, aij, compressed row, sparse, parallel 3746 3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3748 MPIAIJ, MatCreateMPIAIJWithArrays() 3749 @*/ 3750 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3751 { 3752 PetscErrorCode ierr; 3753 PetscMPIInt size; 3754 3755 PetscFunctionBegin; 3756 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3757 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3758 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3759 if (size > 1) { 3760 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3761 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3762 } else { 3763 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3764 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3765 } 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3771 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3772 { 3773 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3774 3775 PetscFunctionBegin; 3776 *Ad = a->A; 3777 *Ao = a->B; 3778 *colmap = a->garray; 3779 PetscFunctionReturn(0); 3780 } 3781 3782 #undef __FUNCT__ 3783 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3784 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3785 { 3786 PetscErrorCode ierr; 3787 PetscInt i; 3788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3789 3790 PetscFunctionBegin; 3791 if (coloring->ctype == IS_COLORING_GLOBAL) { 3792 ISColoringValue *allcolors,*colors; 3793 ISColoring ocoloring; 3794 3795 /* set coloring for diagonal portion */ 3796 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3797 3798 /* set coloring for off-diagonal portion */ 3799 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3800 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3801 for (i=0; i<a->B->cmap->n; i++) { 3802 colors[i] = allcolors[a->garray[i]]; 3803 } 3804 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3805 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3806 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3807 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3808 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3809 ISColoringValue *colors; 3810 PetscInt *larray; 3811 ISColoring ocoloring; 3812 3813 /* set coloring for diagonal portion */ 3814 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3815 for (i=0; i<a->A->cmap->n; i++) { 3816 larray[i] = i + A->cmap->rstart; 3817 } 3818 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3819 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3820 for (i=0; i<a->A->cmap->n; i++) { 3821 colors[i] = coloring->colors[larray[i]]; 3822 } 3823 ierr = PetscFree(larray);CHKERRQ(ierr); 3824 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3825 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3826 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3827 3828 /* set coloring for off-diagonal portion */ 3829 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3830 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3831 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3832 for (i=0; i<a->B->cmap->n; i++) { 3833 colors[i] = coloring->colors[larray[i]]; 3834 } 3835 ierr = PetscFree(larray);CHKERRQ(ierr); 3836 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3837 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3838 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3839 } else { 3840 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3841 } 3842 3843 PetscFunctionReturn(0); 3844 } 3845 3846 #if defined(PETSC_HAVE_ADIC) 3847 #undef __FUNCT__ 3848 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3849 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3850 { 3851 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3852 PetscErrorCode ierr; 3853 3854 PetscFunctionBegin; 3855 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3856 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3857 PetscFunctionReturn(0); 3858 } 3859 #endif 3860 3861 #undef __FUNCT__ 3862 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3863 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3864 { 3865 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3866 PetscErrorCode ierr; 3867 3868 PetscFunctionBegin; 3869 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3870 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3871 PetscFunctionReturn(0); 3872 } 3873 3874 #undef __FUNCT__ 3875 #define __FUNCT__ "MatMerge" 3876 /*@ 3877 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3878 matrices from each processor 3879 3880 Collective on MPI_Comm 3881 3882 Input Parameters: 3883 + comm - the communicators the parallel matrix will live on 3884 . inmat - the input sequential matrices 3885 . n - number of local columns (or PETSC_DECIDE) 3886 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3887 3888 Output Parameter: 3889 . outmat - the parallel matrix generated 3890 3891 Level: advanced 3892 3893 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3894 3895 @*/ 3896 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3897 { 3898 PetscErrorCode ierr; 3899 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3900 PetscInt *indx; 3901 PetscScalar *values; 3902 3903 PetscFunctionBegin; 3904 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3905 if (scall == MAT_INITIAL_MATRIX){ 3906 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3907 if (n == PETSC_DECIDE){ 3908 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3909 } 3910 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3911 rstart -= m; 3912 3913 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3914 for (i=0;i<m;i++) { 3915 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3916 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3917 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3918 } 3919 /* This routine will ONLY return MPIAIJ type matrix */ 3920 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3921 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3922 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3923 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3924 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3925 3926 } else if (scall == MAT_REUSE_MATRIX){ 3927 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3928 } else { 3929 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3930 } 3931 3932 for (i=0;i<m;i++) { 3933 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3934 Ii = i + rstart; 3935 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3936 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3937 } 3938 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3939 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3940 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3941 3942 PetscFunctionReturn(0); 3943 } 3944 3945 #undef __FUNCT__ 3946 #define __FUNCT__ "MatFileSplit" 3947 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3948 { 3949 PetscErrorCode ierr; 3950 PetscMPIInt rank; 3951 PetscInt m,N,i,rstart,nnz; 3952 size_t len; 3953 const PetscInt *indx; 3954 PetscViewer out; 3955 char *name; 3956 Mat B; 3957 const PetscScalar *values; 3958 3959 PetscFunctionBegin; 3960 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3961 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3962 /* Should this be the type of the diagonal block of A? */ 3963 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3964 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3965 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3966 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3967 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3968 for (i=0;i<m;i++) { 3969 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3970 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3971 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3972 } 3973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 3976 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3977 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3978 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3979 sprintf(name,"%s.%d",outfile,rank); 3980 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3981 ierr = PetscFree(name); 3982 ierr = MatView(B,out);CHKERRQ(ierr); 3983 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3984 ierr = MatDestroy(B);CHKERRQ(ierr); 3985 PetscFunctionReturn(0); 3986 } 3987 3988 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3989 #undef __FUNCT__ 3990 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3991 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3992 { 3993 PetscErrorCode ierr; 3994 Mat_Merge_SeqsToMPI *merge; 3995 PetscContainer container; 3996 3997 PetscFunctionBegin; 3998 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3999 if (container) { 4000 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4007 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4008 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4009 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4010 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4011 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4012 4013 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4014 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4015 } 4016 ierr = PetscFree(merge);CHKERRQ(ierr); 4017 4018 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4019 PetscFunctionReturn(0); 4020 } 4021 4022 #include "../src/mat/utils/freespace.h" 4023 #include "petscbt.h" 4024 4025 #undef __FUNCT__ 4026 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4027 /*@C 4028 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4029 matrices from each processor 4030 4031 Collective on MPI_Comm 4032 4033 Input Parameters: 4034 + comm - the communicators the parallel matrix will live on 4035 . seqmat - the input sequential matrices 4036 . m - number of local rows (or PETSC_DECIDE) 4037 . n - number of local columns (or PETSC_DECIDE) 4038 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4039 4040 Output Parameter: 4041 . mpimat - the parallel matrix generated 4042 4043 Level: advanced 4044 4045 Notes: 4046 The dimensions of the sequential matrix in each processor MUST be the same. 4047 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4048 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4049 @*/ 4050 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4051 { 4052 PetscErrorCode ierr; 4053 MPI_Comm comm=((PetscObject)mpimat)->comm; 4054 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4055 PetscMPIInt size,rank,taga,*len_s; 4056 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4057 PetscInt proc,m; 4058 PetscInt **buf_ri,**buf_rj; 4059 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4060 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4061 MPI_Request *s_waits,*r_waits; 4062 MPI_Status *status; 4063 MatScalar *aa=a->a; 4064 MatScalar **abuf_r,*ba_i; 4065 Mat_Merge_SeqsToMPI *merge; 4066 PetscContainer container; 4067 4068 PetscFunctionBegin; 4069 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4070 4071 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4072 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4073 4074 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4075 if (container) { 4076 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4077 } 4078 bi = merge->bi; 4079 bj = merge->bj; 4080 buf_ri = merge->buf_ri; 4081 buf_rj = merge->buf_rj; 4082 4083 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4084 owners = merge->rowmap->range; 4085 len_s = merge->len_s; 4086 4087 /* send and recv matrix values */ 4088 /*-----------------------------*/ 4089 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4090 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4091 4092 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4093 for (proc=0,k=0; proc<size; proc++){ 4094 if (!len_s[proc]) continue; 4095 i = owners[proc]; 4096 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4097 k++; 4098 } 4099 4100 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4101 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4102 ierr = PetscFree(status);CHKERRQ(ierr); 4103 4104 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4105 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4106 4107 /* insert mat values of mpimat */ 4108 /*----------------------------*/ 4109 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4110 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4111 nextrow = buf_ri_k + merge->nrecv; 4112 nextai = nextrow + merge->nrecv; 4113 4114 for (k=0; k<merge->nrecv; k++){ 4115 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4116 nrows = *(buf_ri_k[k]); 4117 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4118 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4119 } 4120 4121 /* set values of ba */ 4122 m = merge->rowmap->n; 4123 for (i=0; i<m; i++) { 4124 arow = owners[rank] + i; 4125 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4126 bnzi = bi[i+1] - bi[i]; 4127 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4128 4129 /* add local non-zero vals of this proc's seqmat into ba */ 4130 anzi = ai[arow+1] - ai[arow]; 4131 aj = a->j + ai[arow]; 4132 aa = a->a + ai[arow]; 4133 nextaj = 0; 4134 for (j=0; nextaj<anzi; j++){ 4135 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4136 ba_i[j] += aa[nextaj++]; 4137 } 4138 } 4139 4140 /* add received vals into ba */ 4141 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4142 /* i-th row */ 4143 if (i == *nextrow[k]) { 4144 anzi = *(nextai[k]+1) - *nextai[k]; 4145 aj = buf_rj[k] + *(nextai[k]); 4146 aa = abuf_r[k] + *(nextai[k]); 4147 nextaj = 0; 4148 for (j=0; nextaj<anzi; j++){ 4149 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4150 ba_i[j] += aa[nextaj++]; 4151 } 4152 } 4153 nextrow[k]++; nextai[k]++; 4154 } 4155 } 4156 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4157 } 4158 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4159 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4160 4161 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4162 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4163 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4164 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4165 PetscFunctionReturn(0); 4166 } 4167 4168 #undef __FUNCT__ 4169 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4170 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4171 { 4172 PetscErrorCode ierr; 4173 Mat B_mpi; 4174 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4175 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4176 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4177 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4178 PetscInt len,proc,*dnz,*onz; 4179 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4180 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4181 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4182 MPI_Status *status; 4183 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4184 PetscBT lnkbt; 4185 Mat_Merge_SeqsToMPI *merge; 4186 PetscContainer container; 4187 4188 PetscFunctionBegin; 4189 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4190 4191 /* make sure it is a PETSc comm */ 4192 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4193 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4195 4196 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4197 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4198 4199 /* determine row ownership */ 4200 /*---------------------------------------------------------*/ 4201 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4202 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4203 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4204 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4205 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4206 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4207 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4208 4209 m = merge->rowmap->n; 4210 M = merge->rowmap->N; 4211 owners = merge->rowmap->range; 4212 4213 /* determine the number of messages to send, their lengths */ 4214 /*---------------------------------------------------------*/ 4215 len_s = merge->len_s; 4216 4217 len = 0; /* length of buf_si[] */ 4218 merge->nsend = 0; 4219 for (proc=0; proc<size; proc++){ 4220 len_si[proc] = 0; 4221 if (proc == rank){ 4222 len_s[proc] = 0; 4223 } else { 4224 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4225 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4226 } 4227 if (len_s[proc]) { 4228 merge->nsend++; 4229 nrows = 0; 4230 for (i=owners[proc]; i<owners[proc+1]; i++){ 4231 if (ai[i+1] > ai[i]) nrows++; 4232 } 4233 len_si[proc] = 2*(nrows+1); 4234 len += len_si[proc]; 4235 } 4236 } 4237 4238 /* determine the number and length of messages to receive for ij-structure */ 4239 /*-------------------------------------------------------------------------*/ 4240 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4241 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4242 4243 /* post the Irecv of j-structure */ 4244 /*-------------------------------*/ 4245 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4246 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4247 4248 /* post the Isend of j-structure */ 4249 /*--------------------------------*/ 4250 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4251 sj_waits = si_waits + merge->nsend; 4252 4253 for (proc=0, k=0; proc<size; proc++){ 4254 if (!len_s[proc]) continue; 4255 i = owners[proc]; 4256 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4257 k++; 4258 } 4259 4260 /* receives and sends of j-structure are complete */ 4261 /*------------------------------------------------*/ 4262 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4263 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4264 4265 /* send and recv i-structure */ 4266 /*---------------------------*/ 4267 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4268 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4269 4270 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4271 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4272 for (proc=0,k=0; proc<size; proc++){ 4273 if (!len_s[proc]) continue; 4274 /* form outgoing message for i-structure: 4275 buf_si[0]: nrows to be sent 4276 [1:nrows]: row index (global) 4277 [nrows+1:2*nrows+1]: i-structure index 4278 */ 4279 /*-------------------------------------------*/ 4280 nrows = len_si[proc]/2 - 1; 4281 buf_si_i = buf_si + nrows+1; 4282 buf_si[0] = nrows; 4283 buf_si_i[0] = 0; 4284 nrows = 0; 4285 for (i=owners[proc]; i<owners[proc+1]; i++){ 4286 anzi = ai[i+1] - ai[i]; 4287 if (anzi) { 4288 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4289 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4290 nrows++; 4291 } 4292 } 4293 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4294 k++; 4295 buf_si += len_si[proc]; 4296 } 4297 4298 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4299 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4300 4301 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4302 for (i=0; i<merge->nrecv; i++){ 4303 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4304 } 4305 4306 ierr = PetscFree(len_si);CHKERRQ(ierr); 4307 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4308 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4309 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4310 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4311 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4312 ierr = PetscFree(status);CHKERRQ(ierr); 4313 4314 /* compute a local seq matrix in each processor */ 4315 /*----------------------------------------------*/ 4316 /* allocate bi array and free space for accumulating nonzero column info */ 4317 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4318 bi[0] = 0; 4319 4320 /* create and initialize a linked list */ 4321 nlnk = N+1; 4322 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4323 4324 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4325 len = 0; 4326 len = ai[owners[rank+1]] - ai[owners[rank]]; 4327 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4328 current_space = free_space; 4329 4330 /* determine symbolic info for each local row */ 4331 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4332 nextrow = buf_ri_k + merge->nrecv; 4333 nextai = nextrow + merge->nrecv; 4334 for (k=0; k<merge->nrecv; k++){ 4335 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4336 nrows = *buf_ri_k[k]; 4337 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4338 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4339 } 4340 4341 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4342 len = 0; 4343 for (i=0;i<m;i++) { 4344 bnzi = 0; 4345 /* add local non-zero cols of this proc's seqmat into lnk */ 4346 arow = owners[rank] + i; 4347 anzi = ai[arow+1] - ai[arow]; 4348 aj = a->j + ai[arow]; 4349 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4350 bnzi += nlnk; 4351 /* add received col data into lnk */ 4352 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4353 if (i == *nextrow[k]) { /* i-th row */ 4354 anzi = *(nextai[k]+1) - *nextai[k]; 4355 aj = buf_rj[k] + *nextai[k]; 4356 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4357 bnzi += nlnk; 4358 nextrow[k]++; nextai[k]++; 4359 } 4360 } 4361 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4362 4363 /* if free space is not available, make more free space */ 4364 if (current_space->local_remaining<bnzi) { 4365 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4366 nspacedouble++; 4367 } 4368 /* copy data into free space, then initialize lnk */ 4369 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4370 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4371 4372 current_space->array += bnzi; 4373 current_space->local_used += bnzi; 4374 current_space->local_remaining -= bnzi; 4375 4376 bi[i+1] = bi[i] + bnzi; 4377 } 4378 4379 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4380 4381 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4382 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4383 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4384 4385 /* create symbolic parallel matrix B_mpi */ 4386 /*---------------------------------------*/ 4387 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4388 if (n==PETSC_DECIDE) { 4389 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4390 } else { 4391 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4392 } 4393 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4394 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4395 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4396 4397 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4398 B_mpi->assembled = PETSC_FALSE; 4399 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4400 merge->bi = bi; 4401 merge->bj = bj; 4402 merge->buf_ri = buf_ri; 4403 merge->buf_rj = buf_rj; 4404 merge->coi = PETSC_NULL; 4405 merge->coj = PETSC_NULL; 4406 merge->owners_co = PETSC_NULL; 4407 4408 /* attach the supporting struct to B_mpi for reuse */ 4409 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4410 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4411 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4412 *mpimat = B_mpi; 4413 4414 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4415 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4416 PetscFunctionReturn(0); 4417 } 4418 4419 #undef __FUNCT__ 4420 #define __FUNCT__ "MatMerge_SeqsToMPI" 4421 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4422 { 4423 PetscErrorCode ierr; 4424 4425 PetscFunctionBegin; 4426 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4427 if (scall == MAT_INITIAL_MATRIX){ 4428 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4429 } 4430 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4431 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4432 PetscFunctionReturn(0); 4433 } 4434 4435 #undef __FUNCT__ 4436 #define __FUNCT__ "MatGetLocalMat" 4437 /*@ 4438 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4439 4440 Not Collective 4441 4442 Input Parameters: 4443 + A - the matrix 4444 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4445 4446 Output Parameter: 4447 . A_loc - the local sequential matrix generated 4448 4449 Level: developer 4450 4451 @*/ 4452 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4453 { 4454 PetscErrorCode ierr; 4455 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4456 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4457 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4458 MatScalar *aa=a->a,*ba=b->a,*cam; 4459 PetscScalar *ca; 4460 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4461 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4462 4463 PetscFunctionBegin; 4464 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4465 if (scall == MAT_INITIAL_MATRIX){ 4466 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4467 ci[0] = 0; 4468 for (i=0; i<am; i++){ 4469 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4470 } 4471 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4472 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4473 k = 0; 4474 for (i=0; i<am; i++) { 4475 ncols_o = bi[i+1] - bi[i]; 4476 ncols_d = ai[i+1] - ai[i]; 4477 /* off-diagonal portion of A */ 4478 for (jo=0; jo<ncols_o; jo++) { 4479 col = cmap[*bj]; 4480 if (col >= cstart) break; 4481 cj[k] = col; bj++; 4482 ca[k++] = *ba++; 4483 } 4484 /* diagonal portion of A */ 4485 for (j=0; j<ncols_d; j++) { 4486 cj[k] = cstart + *aj++; 4487 ca[k++] = *aa++; 4488 } 4489 /* off-diagonal portion of A */ 4490 for (j=jo; j<ncols_o; j++) { 4491 cj[k] = cmap[*bj++]; 4492 ca[k++] = *ba++; 4493 } 4494 } 4495 /* put together the new matrix */ 4496 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4497 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4498 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4499 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4500 mat->free_a = PETSC_TRUE; 4501 mat->free_ij = PETSC_TRUE; 4502 mat->nonew = 0; 4503 } else if (scall == MAT_REUSE_MATRIX){ 4504 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4505 ci = mat->i; cj = mat->j; cam = mat->a; 4506 for (i=0; i<am; i++) { 4507 /* off-diagonal portion of A */ 4508 ncols_o = bi[i+1] - bi[i]; 4509 for (jo=0; jo<ncols_o; jo++) { 4510 col = cmap[*bj]; 4511 if (col >= cstart) break; 4512 *cam++ = *ba++; bj++; 4513 } 4514 /* diagonal portion of A */ 4515 ncols_d = ai[i+1] - ai[i]; 4516 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4517 /* off-diagonal portion of A */ 4518 for (j=jo; j<ncols_o; j++) { 4519 *cam++ = *ba++; bj++; 4520 } 4521 } 4522 } else { 4523 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4524 } 4525 4526 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4527 PetscFunctionReturn(0); 4528 } 4529 4530 #undef __FUNCT__ 4531 #define __FUNCT__ "MatGetLocalMatCondensed" 4532 /*@C 4533 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4534 4535 Not Collective 4536 4537 Input Parameters: 4538 + A - the matrix 4539 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4540 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4541 4542 Output Parameter: 4543 . A_loc - the local sequential matrix generated 4544 4545 Level: developer 4546 4547 @*/ 4548 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4549 { 4550 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4551 PetscErrorCode ierr; 4552 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4553 IS isrowa,iscola; 4554 Mat *aloc; 4555 4556 PetscFunctionBegin; 4557 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4558 if (!row){ 4559 start = A->rmap->rstart; end = A->rmap->rend; 4560 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4561 } else { 4562 isrowa = *row; 4563 } 4564 if (!col){ 4565 start = A->cmap->rstart; 4566 cmap = a->garray; 4567 nzA = a->A->cmap->n; 4568 nzB = a->B->cmap->n; 4569 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4570 ncols = 0; 4571 for (i=0; i<nzB; i++) { 4572 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4573 else break; 4574 } 4575 imark = i; 4576 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4577 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4578 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4579 ierr = PetscFree(idx);CHKERRQ(ierr); 4580 } else { 4581 iscola = *col; 4582 } 4583 if (scall != MAT_INITIAL_MATRIX){ 4584 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4585 aloc[0] = *A_loc; 4586 } 4587 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4588 *A_loc = aloc[0]; 4589 ierr = PetscFree(aloc);CHKERRQ(ierr); 4590 if (!row){ 4591 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4592 } 4593 if (!col){ 4594 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4595 } 4596 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4597 PetscFunctionReturn(0); 4598 } 4599 4600 #undef __FUNCT__ 4601 #define __FUNCT__ "MatGetBrowsOfAcols" 4602 /*@C 4603 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4604 4605 Collective on Mat 4606 4607 Input Parameters: 4608 + A,B - the matrices in mpiaij format 4609 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4610 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4611 4612 Output Parameter: 4613 + rowb, colb - index sets of rows and columns of B to extract 4614 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4615 - B_seq - the sequential matrix generated 4616 4617 Level: developer 4618 4619 @*/ 4620 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4621 { 4622 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4623 PetscErrorCode ierr; 4624 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4625 IS isrowb,iscolb; 4626 Mat *bseq; 4627 4628 PetscFunctionBegin; 4629 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4630 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4631 } 4632 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4633 4634 if (scall == MAT_INITIAL_MATRIX){ 4635 start = A->cmap->rstart; 4636 cmap = a->garray; 4637 nzA = a->A->cmap->n; 4638 nzB = a->B->cmap->n; 4639 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4640 ncols = 0; 4641 for (i=0; i<nzB; i++) { /* row < local row index */ 4642 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4643 else break; 4644 } 4645 imark = i; 4646 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4647 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4648 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4649 ierr = PetscFree(idx);CHKERRQ(ierr); 4650 *brstart = imark; 4651 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4652 } else { 4653 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4654 isrowb = *rowb; iscolb = *colb; 4655 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4656 bseq[0] = *B_seq; 4657 } 4658 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4659 *B_seq = bseq[0]; 4660 ierr = PetscFree(bseq);CHKERRQ(ierr); 4661 if (!rowb){ 4662 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4663 } else { 4664 *rowb = isrowb; 4665 } 4666 if (!colb){ 4667 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4668 } else { 4669 *colb = iscolb; 4670 } 4671 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4672 PetscFunctionReturn(0); 4673 } 4674 4675 #undef __FUNCT__ 4676 #define __FUNCT__ "MatGetBrowsOfAoCols" 4677 /*@C 4678 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4679 of the OFF-DIAGONAL portion of local A 4680 4681 Collective on Mat 4682 4683 Input Parameters: 4684 + A,B - the matrices in mpiaij format 4685 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4686 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4687 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4688 4689 Output Parameter: 4690 + B_oth - the sequential matrix generated 4691 4692 Level: developer 4693 4694 @*/ 4695 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4696 { 4697 VecScatter_MPI_General *gen_to,*gen_from; 4698 PetscErrorCode ierr; 4699 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4700 Mat_SeqAIJ *b_oth; 4701 VecScatter ctx=a->Mvctx; 4702 MPI_Comm comm=((PetscObject)ctx)->comm; 4703 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4704 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4705 PetscScalar *rvalues,*svalues; 4706 MatScalar *b_otha,*bufa,*bufA; 4707 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4708 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4709 MPI_Status *sstatus,rstatus; 4710 PetscMPIInt jj; 4711 PetscInt *cols,sbs,rbs; 4712 PetscScalar *vals; 4713 4714 PetscFunctionBegin; 4715 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4716 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4717 } 4718 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4719 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4720 4721 gen_to = (VecScatter_MPI_General*)ctx->todata; 4722 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4723 rvalues = gen_from->values; /* holds the length of receiving row */ 4724 svalues = gen_to->values; /* holds the length of sending row */ 4725 nrecvs = gen_from->n; 4726 nsends = gen_to->n; 4727 4728 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4729 srow = gen_to->indices; /* local row index to be sent */ 4730 sstarts = gen_to->starts; 4731 sprocs = gen_to->procs; 4732 sstatus = gen_to->sstatus; 4733 sbs = gen_to->bs; 4734 rstarts = gen_from->starts; 4735 rprocs = gen_from->procs; 4736 rbs = gen_from->bs; 4737 4738 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4739 if (scall == MAT_INITIAL_MATRIX){ 4740 /* i-array */ 4741 /*---------*/ 4742 /* post receives */ 4743 for (i=0; i<nrecvs; i++){ 4744 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4745 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4746 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4747 } 4748 4749 /* pack the outgoing message */ 4750 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4751 rstartsj = sstartsj + nsends +1; 4752 sstartsj[0] = 0; rstartsj[0] = 0; 4753 len = 0; /* total length of j or a array to be sent */ 4754 k = 0; 4755 for (i=0; i<nsends; i++){ 4756 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4757 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4758 for (j=0; j<nrows; j++) { 4759 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4760 for (l=0; l<sbs; l++){ 4761 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4762 rowlen[j*sbs+l] = ncols; 4763 len += ncols; 4764 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4765 } 4766 k++; 4767 } 4768 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4769 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4770 } 4771 /* recvs and sends of i-array are completed */ 4772 i = nrecvs; 4773 while (i--) { 4774 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4775 } 4776 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4777 4778 /* allocate buffers for sending j and a arrays */ 4779 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4780 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4781 4782 /* create i-array of B_oth */ 4783 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4784 b_othi[0] = 0; 4785 len = 0; /* total length of j or a array to be received */ 4786 k = 0; 4787 for (i=0; i<nrecvs; i++){ 4788 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4789 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4790 for (j=0; j<nrows; j++) { 4791 b_othi[k+1] = b_othi[k] + rowlen[j]; 4792 len += rowlen[j]; k++; 4793 } 4794 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4795 } 4796 4797 /* allocate space for j and a arrrays of B_oth */ 4798 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4799 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4800 4801 /* j-array */ 4802 /*---------*/ 4803 /* post receives of j-array */ 4804 for (i=0; i<nrecvs; i++){ 4805 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4806 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4807 } 4808 4809 /* pack the outgoing message j-array */ 4810 k = 0; 4811 for (i=0; i<nsends; i++){ 4812 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4813 bufJ = bufj+sstartsj[i]; 4814 for (j=0; j<nrows; j++) { 4815 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4816 for (ll=0; ll<sbs; ll++){ 4817 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4818 for (l=0; l<ncols; l++){ 4819 *bufJ++ = cols[l]; 4820 } 4821 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4822 } 4823 } 4824 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4825 } 4826 4827 /* recvs and sends of j-array are completed */ 4828 i = nrecvs; 4829 while (i--) { 4830 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4831 } 4832 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4833 } else if (scall == MAT_REUSE_MATRIX){ 4834 sstartsj = *startsj; 4835 rstartsj = sstartsj + nsends +1; 4836 bufa = *bufa_ptr; 4837 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4838 b_otha = b_oth->a; 4839 } else { 4840 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4841 } 4842 4843 /* a-array */ 4844 /*---------*/ 4845 /* post receives of a-array */ 4846 for (i=0; i<nrecvs; i++){ 4847 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4848 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4849 } 4850 4851 /* pack the outgoing message a-array */ 4852 k = 0; 4853 for (i=0; i<nsends; i++){ 4854 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4855 bufA = bufa+sstartsj[i]; 4856 for (j=0; j<nrows; j++) { 4857 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4858 for (ll=0; ll<sbs; ll++){ 4859 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4860 for (l=0; l<ncols; l++){ 4861 *bufA++ = vals[l]; 4862 } 4863 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4864 } 4865 } 4866 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4867 } 4868 /* recvs and sends of a-array are completed */ 4869 i = nrecvs; 4870 while (i--) { 4871 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4872 } 4873 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4874 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4875 4876 if (scall == MAT_INITIAL_MATRIX){ 4877 /* put together the new matrix */ 4878 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4879 4880 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4881 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4882 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4883 b_oth->free_a = PETSC_TRUE; 4884 b_oth->free_ij = PETSC_TRUE; 4885 b_oth->nonew = 0; 4886 4887 ierr = PetscFree(bufj);CHKERRQ(ierr); 4888 if (!startsj || !bufa_ptr){ 4889 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4890 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4891 } else { 4892 *startsj = sstartsj; 4893 *bufa_ptr = bufa; 4894 } 4895 } 4896 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4897 PetscFunctionReturn(0); 4898 } 4899 4900 #undef __FUNCT__ 4901 #define __FUNCT__ "MatGetCommunicationStructs" 4902 /*@C 4903 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4904 4905 Not Collective 4906 4907 Input Parameters: 4908 . A - The matrix in mpiaij format 4909 4910 Output Parameter: 4911 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4912 . colmap - A map from global column index to local index into lvec 4913 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4914 4915 Level: developer 4916 4917 @*/ 4918 #if defined (PETSC_USE_CTABLE) 4919 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4920 #else 4921 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4922 #endif 4923 { 4924 Mat_MPIAIJ *a; 4925 4926 PetscFunctionBegin; 4927 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4928 PetscValidPointer(lvec, 2) 4929 PetscValidPointer(colmap, 3) 4930 PetscValidPointer(multScatter, 4) 4931 a = (Mat_MPIAIJ *) A->data; 4932 if (lvec) *lvec = a->lvec; 4933 if (colmap) *colmap = a->colmap; 4934 if (multScatter) *multScatter = a->Mvctx; 4935 PetscFunctionReturn(0); 4936 } 4937 4938 EXTERN_C_BEGIN 4939 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4940 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4941 EXTERN_C_END 4942 4943 #undef __FUNCT__ 4944 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4945 /* 4946 Computes (B'*A')' since computing B*A directly is untenable 4947 4948 n p p 4949 ( ) ( ) ( ) 4950 m ( A ) * n ( B ) = m ( C ) 4951 ( ) ( ) ( ) 4952 4953 */ 4954 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4955 { 4956 PetscErrorCode ierr; 4957 Mat At,Bt,Ct; 4958 4959 PetscFunctionBegin; 4960 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4961 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4962 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4963 ierr = MatDestroy(At);CHKERRQ(ierr); 4964 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4965 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4966 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4967 PetscFunctionReturn(0); 4968 } 4969 4970 #undef __FUNCT__ 4971 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4972 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4973 { 4974 PetscErrorCode ierr; 4975 PetscInt m=A->rmap->n,n=B->cmap->n; 4976 Mat Cmat; 4977 4978 PetscFunctionBegin; 4979 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4980 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4981 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4982 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4983 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4984 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4985 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4986 *C = Cmat; 4987 PetscFunctionReturn(0); 4988 } 4989 4990 /* ----------------------------------------------------------------*/ 4991 #undef __FUNCT__ 4992 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4993 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4994 { 4995 PetscErrorCode ierr; 4996 4997 PetscFunctionBegin; 4998 if (scall == MAT_INITIAL_MATRIX){ 4999 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5000 } 5001 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5002 PetscFunctionReturn(0); 5003 } 5004 5005 EXTERN_C_BEGIN 5006 #if defined(PETSC_HAVE_MUMPS) 5007 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5008 #endif 5009 #if defined(PETSC_HAVE_PASTIX) 5010 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5011 #endif 5012 #if defined(PETSC_HAVE_SUPERLU_DIST) 5013 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5014 #endif 5015 #if defined(PETSC_HAVE_SPOOLES) 5016 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5017 #endif 5018 EXTERN_C_END 5019 5020 /*MC 5021 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5022 5023 Options Database Keys: 5024 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5025 5026 Level: beginner 5027 5028 .seealso: MatCreateMPIAIJ() 5029 M*/ 5030 5031 EXTERN_C_BEGIN 5032 #undef __FUNCT__ 5033 #define __FUNCT__ "MatCreate_MPIAIJ" 5034 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5035 { 5036 Mat_MPIAIJ *b; 5037 PetscErrorCode ierr; 5038 PetscMPIInt size; 5039 5040 PetscFunctionBegin; 5041 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5042 5043 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5044 B->data = (void*)b; 5045 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5046 B->rmap->bs = 1; 5047 B->assembled = PETSC_FALSE; 5048 B->mapping = 0; 5049 5050 B->insertmode = NOT_SET_VALUES; 5051 b->size = size; 5052 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5053 5054 /* build cache for off array entries formed */ 5055 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5056 b->donotstash = PETSC_FALSE; 5057 b->colmap = 0; 5058 b->garray = 0; 5059 b->roworiented = PETSC_TRUE; 5060 5061 /* stuff used for matrix vector multiply */ 5062 b->lvec = PETSC_NULL; 5063 b->Mvctx = PETSC_NULL; 5064 5065 /* stuff for MatGetRow() */ 5066 b->rowindices = 0; 5067 b->rowvalues = 0; 5068 b->getrowactive = PETSC_FALSE; 5069 5070 #if defined(PETSC_HAVE_SPOOLES) 5071 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5072 "MatGetFactor_mpiaij_spooles", 5073 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5074 #endif 5075 #if defined(PETSC_HAVE_MUMPS) 5076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5077 "MatGetFactor_mpiaij_mumps", 5078 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5079 #endif 5080 #if defined(PETSC_HAVE_PASTIX) 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5082 "MatGetFactor_mpiaij_pastix", 5083 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5084 #endif 5085 #if defined(PETSC_HAVE_SUPERLU_DIST) 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5087 "MatGetFactor_mpiaij_superlu_dist", 5088 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5089 #endif 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5091 "MatStoreValues_MPIAIJ", 5092 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5094 "MatRetrieveValues_MPIAIJ", 5095 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5097 "MatGetDiagonalBlock_MPIAIJ", 5098 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5100 "MatIsTranspose_MPIAIJ", 5101 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5103 "MatMPIAIJSetPreallocation_MPIAIJ", 5104 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5106 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5107 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5109 "MatDiagonalScaleLocal_MPIAIJ", 5110 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5112 "MatConvert_MPIAIJ_MPICSRPERM", 5113 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5115 "MatConvert_MPIAIJ_MPICRL", 5116 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5118 "MatMatMult_MPIDense_MPIAIJ", 5119 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5120 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5121 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5122 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5123 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5124 "MatMatMultNumeric_MPIDense_MPIAIJ", 5125 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5126 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5127 PetscFunctionReturn(0); 5128 } 5129 EXTERN_C_END 5130 5131 #undef __FUNCT__ 5132 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5133 /*@ 5134 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5135 and "off-diagonal" part of the matrix in CSR format. 5136 5137 Collective on MPI_Comm 5138 5139 Input Parameters: 5140 + comm - MPI communicator 5141 . m - number of local rows (Cannot be PETSC_DECIDE) 5142 . n - This value should be the same as the local size used in creating the 5143 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5144 calculated if N is given) For square matrices n is almost always m. 5145 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5146 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5147 . i - row indices for "diagonal" portion of matrix 5148 . j - column indices 5149 . a - matrix values 5150 . oi - row indices for "off-diagonal" portion of matrix 5151 . oj - column indices 5152 - oa - matrix values 5153 5154 Output Parameter: 5155 . mat - the matrix 5156 5157 Level: advanced 5158 5159 Notes: 5160 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5161 5162 The i and j indices are 0 based 5163 5164 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5165 5166 This sets local rows and cannot be used to set off-processor values. 5167 5168 You cannot later use MatSetValues() to change values in this matrix. 5169 5170 .keywords: matrix, aij, compressed row, sparse, parallel 5171 5172 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5173 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5174 @*/ 5175 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5176 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5177 { 5178 PetscErrorCode ierr; 5179 Mat_MPIAIJ *maij; 5180 5181 PetscFunctionBegin; 5182 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5183 if (i[0]) { 5184 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5185 } 5186 if (oi[0]) { 5187 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5188 } 5189 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5190 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5191 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5192 maij = (Mat_MPIAIJ*) (*mat)->data; 5193 maij->donotstash = PETSC_TRUE; 5194 (*mat)->preallocated = PETSC_TRUE; 5195 5196 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5197 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5198 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5199 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5200 5201 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5202 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5203 5204 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5206 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5207 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5208 5209 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5210 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5211 PetscFunctionReturn(0); 5212 } 5213 5214 /* 5215 Special version for direct calls from Fortran 5216 */ 5217 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5218 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5219 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5220 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5221 #endif 5222 5223 /* Change these macros so can be used in void function */ 5224 #undef CHKERRQ 5225 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5226 #undef SETERRQ2 5227 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5228 #undef SETERRQ 5229 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5230 5231 EXTERN_C_BEGIN 5232 #undef __FUNCT__ 5233 #define __FUNCT__ "matsetvaluesmpiaij_" 5234 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5235 { 5236 Mat mat = *mmat; 5237 PetscInt m = *mm, n = *mn; 5238 InsertMode addv = *maddv; 5239 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5240 PetscScalar value; 5241 PetscErrorCode ierr; 5242 5243 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5244 if (mat->insertmode == NOT_SET_VALUES) { 5245 mat->insertmode = addv; 5246 } 5247 #if defined(PETSC_USE_DEBUG) 5248 else if (mat->insertmode != addv) { 5249 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5250 } 5251 #endif 5252 { 5253 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5254 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5255 PetscTruth roworiented = aij->roworiented; 5256 5257 /* Some Variables required in the macro */ 5258 Mat A = aij->A; 5259 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5260 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5261 MatScalar *aa = a->a; 5262 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5263 Mat B = aij->B; 5264 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5265 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5266 MatScalar *ba = b->a; 5267 5268 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5269 PetscInt nonew = a->nonew; 5270 MatScalar *ap1,*ap2; 5271 5272 PetscFunctionBegin; 5273 for (i=0; i<m; i++) { 5274 if (im[i] < 0) continue; 5275 #if defined(PETSC_USE_DEBUG) 5276 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5277 #endif 5278 if (im[i] >= rstart && im[i] < rend) { 5279 row = im[i] - rstart; 5280 lastcol1 = -1; 5281 rp1 = aj + ai[row]; 5282 ap1 = aa + ai[row]; 5283 rmax1 = aimax[row]; 5284 nrow1 = ailen[row]; 5285 low1 = 0; 5286 high1 = nrow1; 5287 lastcol2 = -1; 5288 rp2 = bj + bi[row]; 5289 ap2 = ba + bi[row]; 5290 rmax2 = bimax[row]; 5291 nrow2 = bilen[row]; 5292 low2 = 0; 5293 high2 = nrow2; 5294 5295 for (j=0; j<n; j++) { 5296 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5297 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5298 if (in[j] >= cstart && in[j] < cend){ 5299 col = in[j] - cstart; 5300 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5301 } else if (in[j] < 0) continue; 5302 #if defined(PETSC_USE_DEBUG) 5303 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5304 #endif 5305 else { 5306 if (mat->was_assembled) { 5307 if (!aij->colmap) { 5308 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5309 } 5310 #if defined (PETSC_USE_CTABLE) 5311 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5312 col--; 5313 #else 5314 col = aij->colmap[in[j]] - 1; 5315 #endif 5316 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5317 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5318 col = in[j]; 5319 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5320 B = aij->B; 5321 b = (Mat_SeqAIJ*)B->data; 5322 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5323 rp2 = bj + bi[row]; 5324 ap2 = ba + bi[row]; 5325 rmax2 = bimax[row]; 5326 nrow2 = bilen[row]; 5327 low2 = 0; 5328 high2 = nrow2; 5329 bm = aij->B->rmap->n; 5330 ba = b->a; 5331 } 5332 } else col = in[j]; 5333 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5334 } 5335 } 5336 } else { 5337 if (!aij->donotstash) { 5338 if (roworiented) { 5339 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5340 } else { 5341 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5342 } 5343 } 5344 } 5345 }} 5346 PetscFunctionReturnVoid(); 5347 } 5348 EXTERN_C_END 5349 5350