1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) { 468 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 } 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 643 source = lens + nrecvs; 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree(lens);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_COOKIE; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_STRUCTURALLY_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_HERMITIAN: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_SYMMETRY_ETERNAL: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 default: 1461 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetRow_MPIAIJ" 1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1469 { 1470 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1471 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1472 PetscErrorCode ierr; 1473 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1474 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1475 PetscInt *cmap,*idx_p; 1476 1477 PetscFunctionBegin; 1478 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1479 mat->getrowactive = PETSC_TRUE; 1480 1481 if (!mat->rowvalues && (idx || v)) { 1482 /* 1483 allocate enough space to hold information from the longest row. 1484 */ 1485 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1486 PetscInt max = 1,tmp; 1487 for (i=0; i<matin->rmap->n; i++) { 1488 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1489 if (max < tmp) { max = tmp; } 1490 } 1491 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1492 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1493 } 1494 1495 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1496 lrow = row - rstart; 1497 1498 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1499 if (!v) {pvA = 0; pvB = 0;} 1500 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1501 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1503 nztot = nzA + nzB; 1504 1505 cmap = mat->garray; 1506 if (v || idx) { 1507 if (nztot) { 1508 /* Sort by increasing column numbers, assuming A and B already sorted */ 1509 PetscInt imark = -1; 1510 if (v) { 1511 *v = v_p = mat->rowvalues; 1512 for (i=0; i<nzB; i++) { 1513 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1514 else break; 1515 } 1516 imark = i; 1517 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1518 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1519 } 1520 if (idx) { 1521 *idx = idx_p = mat->rowindices; 1522 if (imark > -1) { 1523 for (i=0; i<imark; i++) { 1524 idx_p[i] = cmap[cworkB[i]]; 1525 } 1526 } else { 1527 for (i=0; i<nzB; i++) { 1528 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1529 else break; 1530 } 1531 imark = i; 1532 } 1533 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1534 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1535 } 1536 } else { 1537 if (idx) *idx = 0; 1538 if (v) *v = 0; 1539 } 1540 } 1541 *nz = nztot; 1542 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1543 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1549 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1550 { 1551 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1552 1553 PetscFunctionBegin; 1554 if (!aij->getrowactive) { 1555 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1556 } 1557 aij->getrowactive = PETSC_FALSE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatNorm_MPIAIJ" 1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1564 { 1565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1566 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,cstart = mat->cmap->rstart; 1569 PetscReal sum = 0.0; 1570 MatScalar *v; 1571 1572 PetscFunctionBegin; 1573 if (aij->size == 1) { 1574 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1575 } else { 1576 if (type == NORM_FROBENIUS) { 1577 v = amat->a; 1578 for (i=0; i<amat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 v = bmat->a; 1586 for (i=0; i<bmat->nz; i++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1589 #else 1590 sum += (*v)*(*v); v++; 1591 #endif 1592 } 1593 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1594 *norm = sqrt(*norm); 1595 } else if (type == NORM_1) { /* max column norm */ 1596 PetscReal *tmp,*tmp2; 1597 PetscInt *jj,*garray = aij->garray; 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1600 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1601 *norm = 0.0; 1602 v = amat->a; jj = amat->j; 1603 for (j=0; j<amat->nz; j++) { 1604 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1605 } 1606 v = bmat->a; jj = bmat->j; 1607 for (j=0; j<bmat->nz; j++) { 1608 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1609 } 1610 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1611 for (j=0; j<mat->cmap->N; j++) { 1612 if (tmp2[j] > *norm) *norm = tmp2[j]; 1613 } 1614 ierr = PetscFree(tmp);CHKERRQ(ierr); 1615 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1616 } else if (type == NORM_INFINITY) { /* max row norm */ 1617 PetscReal ntemp = 0.0; 1618 for (j=0; j<aij->A->rmap->n; j++) { 1619 v = amat->a + amat->i[j]; 1620 sum = 0.0; 1621 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1622 sum += PetscAbsScalar(*v); v++; 1623 } 1624 v = bmat->a + bmat->i[j]; 1625 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1626 sum += PetscAbsScalar(*v); v++; 1627 } 1628 if (sum > ntemp) ntemp = sum; 1629 } 1630 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1631 } else { 1632 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1633 } 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatTranspose_MPIAIJ" 1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1641 { 1642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1643 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1644 PetscErrorCode ierr; 1645 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1646 PetscInt cstart=A->cmap->rstart,ncol; 1647 Mat B; 1648 MatScalar *array; 1649 1650 PetscFunctionBegin; 1651 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1652 1653 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1654 ai = Aloc->i; aj = Aloc->j; 1655 bi = Bloc->i; bj = Bloc->j; 1656 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1657 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1658 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1659 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1660 for (i=0; i<ai[ma]; i++){ 1661 d_nnz[aj[i]] ++; 1662 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1663 } 1664 1665 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1666 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1667 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1670 } else { 1671 B = *matout; 1672 } 1673 1674 /* copy over the A part */ 1675 array = Aloc->a; 1676 row = A->rmap->rstart; 1677 for (i=0; i<ma; i++) { 1678 ncol = ai[i+1]-ai[i]; 1679 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; array += ncol; aj += ncol; 1681 } 1682 aj = Aloc->j; 1683 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1684 1685 /* copy over the B part */ 1686 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1687 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1688 array = Bloc->a; 1689 row = A->rmap->rstart; 1690 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1691 cols_tmp = cols; 1692 for (i=0; i<mb; i++) { 1693 ncol = bi[i+1]-bi[i]; 1694 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; array += ncol; cols_tmp += ncol; 1696 } 1697 ierr = PetscFree(cols);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1702 *matout = B; 1703 } else { 1704 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1714 Mat a = aij->A,b = aij->B; 1715 PetscErrorCode ierr; 1716 PetscInt s1,s2,s3; 1717 1718 PetscFunctionBegin; 1719 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1720 if (rr) { 1721 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1722 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1723 /* Overlap communication with computation. */ 1724 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 if (ll) { 1727 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1728 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1729 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1730 } 1731 /* scale the diagonal block */ 1732 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1733 1734 if (rr) { 1735 /* Do a scatter end and then right scale the off-diagonal block */ 1736 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1738 } 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1746 { 1747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1752 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1753 PetscFunctionReturn(0); 1754 } 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1757 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1758 { 1759 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1760 PetscErrorCode ierr; 1761 1762 PetscFunctionBegin; 1763 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatEqual_MPIAIJ" 1769 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1770 { 1771 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1772 Mat a,b,c,d; 1773 PetscTruth flg; 1774 PetscErrorCode ierr; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; b = matA->B; 1778 c = matB->A; d = matB->B; 1779 1780 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1781 if (flg) { 1782 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1783 } 1784 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "MatCopy_MPIAIJ" 1790 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1791 { 1792 PetscErrorCode ierr; 1793 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1794 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1795 1796 PetscFunctionBegin; 1797 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1798 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1799 /* because of the column compression in the off-processor part of the matrix a->B, 1800 the number of columns in a->B and b->B may be different, hence we cannot call 1801 the MatCopy() directly on the two parts. If need be, we can provide a more 1802 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1803 then copying the submatrices */ 1804 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1805 } else { 1806 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1807 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1808 } 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1814 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1815 { 1816 PetscErrorCode ierr; 1817 1818 PetscFunctionBegin; 1819 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #include "petscblaslapack.h" 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatAXPY_MPIAIJ" 1826 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1827 { 1828 PetscErrorCode ierr; 1829 PetscInt i; 1830 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1831 PetscBLASInt bnz,one=1; 1832 Mat_SeqAIJ *x,*y; 1833 1834 PetscFunctionBegin; 1835 if (str == SAME_NONZERO_PATTERN) { 1836 PetscScalar alpha = a; 1837 x = (Mat_SeqAIJ *)xx->A->data; 1838 y = (Mat_SeqAIJ *)yy->A->data; 1839 bnz = PetscBLASIntCast(x->nz); 1840 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1841 x = (Mat_SeqAIJ *)xx->B->data; 1842 y = (Mat_SeqAIJ *)yy->B->data; 1843 bnz = PetscBLASIntCast(x->nz); 1844 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1845 } else if (str == SUBSET_NONZERO_PATTERN) { 1846 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1847 1848 x = (Mat_SeqAIJ *)xx->B->data; 1849 y = (Mat_SeqAIJ *)yy->B->data; 1850 if (y->xtoy && y->XtoY != xx->B) { 1851 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1852 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1853 } 1854 if (!y->xtoy) { /* get xtoy */ 1855 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1856 y->XtoY = xx->B; 1857 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1858 } 1859 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1860 } else { 1861 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatConjugate_MPIAIJ" 1870 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1871 { 1872 #if defined(PETSC_USE_COMPLEX) 1873 PetscErrorCode ierr; 1874 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1875 1876 PetscFunctionBegin; 1877 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1878 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1879 #else 1880 PetscFunctionBegin; 1881 #endif 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatRealPart_MPIAIJ" 1887 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1888 { 1889 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1890 PetscErrorCode ierr; 1891 1892 PetscFunctionBegin; 1893 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1894 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1900 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1901 { 1902 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1907 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #ifdef PETSC_HAVE_PBGL 1912 1913 #include <boost/parallel/mpi/bsp_process_group.hpp> 1914 #include <boost/graph/distributed/ilu_default_graph.hpp> 1915 #include <boost/graph/distributed/ilu_0_block.hpp> 1916 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1917 #include <boost/graph/distributed/petsc/interface.hpp> 1918 #include <boost/multi_array.hpp> 1919 #include <boost/parallel/distributed_property_map->hpp> 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1923 /* 1924 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1925 */ 1926 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1927 { 1928 namespace petsc = boost::distributed::petsc; 1929 1930 namespace graph_dist = boost::graph::distributed; 1931 using boost::graph::distributed::ilu_default::process_group_type; 1932 using boost::graph::ilu_permuted; 1933 1934 PetscTruth row_identity, col_identity; 1935 PetscContainer c; 1936 PetscInt m, n, M, N; 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1941 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1942 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1943 if (!row_identity || !col_identity) { 1944 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1945 } 1946 1947 process_group_type pg; 1948 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1949 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1950 lgraph_type& level_graph = *lgraph_p; 1951 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1952 1953 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1954 ilu_permuted(level_graph); 1955 1956 /* put together the new matrix */ 1957 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1958 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1959 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1960 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1961 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1962 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 1965 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1966 ierr = PetscContainerSetPointer(c, lgraph_p); 1967 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1973 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1974 { 1975 PetscFunctionBegin; 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatSolve_MPIAIJ" 1981 /* 1982 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1983 */ 1984 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1985 { 1986 namespace graph_dist = boost::graph::distributed; 1987 1988 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1989 lgraph_type* lgraph_p; 1990 PetscContainer c; 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1995 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1996 ierr = VecCopy(b, x);CHKERRQ(ierr); 1997 1998 PetscScalar* array_x; 1999 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2000 PetscInt sx; 2001 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2002 2003 PetscScalar* array_b; 2004 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2005 PetscInt sb; 2006 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2007 2008 lgraph_type& level_graph = *lgraph_p; 2009 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2010 2011 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2012 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2013 ref_x(array_x, boost::extents[num_vertices(graph)]); 2014 2015 typedef boost::iterator_property_map<array_ref_type::iterator, 2016 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2017 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2018 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2019 2020 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2021 2022 PetscFunctionReturn(0); 2023 } 2024 #endif 2025 2026 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2027 PetscInt nzlocal,nsends,nrecvs; 2028 PetscMPIInt *send_rank; 2029 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2030 PetscScalar *sbuf_a,**rbuf_a; 2031 PetscErrorCode (*MatDestroy)(Mat); 2032 } Mat_Redundant; 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2036 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2037 { 2038 PetscErrorCode ierr; 2039 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2040 PetscInt i; 2041 2042 PetscFunctionBegin; 2043 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2044 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2046 for (i=0; i<redund->nrecvs; i++){ 2047 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2048 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2049 } 2050 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2051 ierr = PetscFree(redund);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatDestroy_MatRedundant" 2057 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2058 { 2059 PetscErrorCode ierr; 2060 PetscContainer container; 2061 Mat_Redundant *redund=PETSC_NULL; 2062 2063 PetscFunctionBegin; 2064 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2065 if (container) { 2066 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2067 } else { 2068 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2069 } 2070 A->ops->destroy = redund->MatDestroy; 2071 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2072 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2073 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2079 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2080 { 2081 PetscMPIInt rank,size; 2082 MPI_Comm comm=((PetscObject)mat)->comm; 2083 PetscErrorCode ierr; 2084 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2085 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2086 PetscInt *rowrange=mat->rmap->range; 2087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2088 Mat A=aij->A,B=aij->B,C=*matredundant; 2089 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2090 PetscScalar *sbuf_a; 2091 PetscInt nzlocal=a->nz+b->nz; 2092 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2093 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2094 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2095 MatScalar *aworkA,*aworkB; 2096 PetscScalar *vals; 2097 PetscMPIInt tag1,tag2,tag3,imdex; 2098 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2099 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2100 MPI_Status recv_status,*send_status; 2101 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2102 PetscInt **rbuf_j=PETSC_NULL; 2103 PetscScalar **rbuf_a=PETSC_NULL; 2104 Mat_Redundant *redund=PETSC_NULL; 2105 PetscContainer container; 2106 2107 PetscFunctionBegin; 2108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2110 2111 if (reuse == MAT_REUSE_MATRIX) { 2112 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2113 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2114 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2115 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2116 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2117 if (container) { 2118 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2119 } else { 2120 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2121 } 2122 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2123 2124 nsends = redund->nsends; 2125 nrecvs = redund->nrecvs; 2126 send_rank = redund->send_rank; recv_rank = send_rank + size; 2127 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2128 sbuf_j = redund->sbuf_j; 2129 sbuf_a = redund->sbuf_a; 2130 rbuf_j = redund->rbuf_j; 2131 rbuf_a = redund->rbuf_a; 2132 } 2133 2134 if (reuse == MAT_INITIAL_MATRIX){ 2135 PetscMPIInt subrank,subsize; 2136 PetscInt nleftover,np_subcomm; 2137 /* get the destination processors' id send_rank, nsends and nrecvs */ 2138 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2139 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2140 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2141 recv_rank = send_rank + size; 2142 np_subcomm = size/nsubcomm; 2143 nleftover = size - nsubcomm*np_subcomm; 2144 nsends = 0; nrecvs = 0; 2145 for (i=0; i<size; i++){ /* i=rank*/ 2146 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2147 send_rank[nsends] = i; nsends++; 2148 recv_rank[nrecvs++] = i; 2149 } 2150 } 2151 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2152 i = size-nleftover-1; 2153 j = 0; 2154 while (j < nsubcomm - nleftover){ 2155 send_rank[nsends++] = i; 2156 i--; j++; 2157 } 2158 } 2159 2160 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2161 for (i=0; i<nleftover; i++){ 2162 recv_rank[nrecvs++] = size-nleftover+i; 2163 } 2164 } 2165 2166 /* allocate sbuf_j, sbuf_a */ 2167 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2168 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2169 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2170 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2171 2172 /* copy mat's local entries into the buffers */ 2173 if (reuse == MAT_INITIAL_MATRIX){ 2174 rownz_max = 0; 2175 rptr = sbuf_j; 2176 cols = sbuf_j + rend-rstart + 1; 2177 vals = sbuf_a; 2178 rptr[0] = 0; 2179 for (i=0; i<rend-rstart; i++){ 2180 row = i + rstart; 2181 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2182 ncols = nzA + nzB; 2183 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2184 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2185 /* load the column indices for this row into cols */ 2186 lwrite = 0; 2187 for (l=0; l<nzB; l++) { 2188 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2189 vals[lwrite] = aworkB[l]; 2190 cols[lwrite++] = ctmp; 2191 } 2192 } 2193 for (l=0; l<nzA; l++){ 2194 vals[lwrite] = aworkA[l]; 2195 cols[lwrite++] = cstart + cworkA[l]; 2196 } 2197 for (l=0; l<nzB; l++) { 2198 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2199 vals[lwrite] = aworkB[l]; 2200 cols[lwrite++] = ctmp; 2201 } 2202 } 2203 vals += ncols; 2204 cols += ncols; 2205 rptr[i+1] = rptr[i] + ncols; 2206 if (rownz_max < ncols) rownz_max = ncols; 2207 } 2208 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2209 } else { /* only copy matrix values into sbuf_a */ 2210 rptr = sbuf_j; 2211 vals = sbuf_a; 2212 rptr[0] = 0; 2213 for (i=0; i<rend-rstart; i++){ 2214 row = i + rstart; 2215 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2216 ncols = nzA + nzB; 2217 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2218 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2219 lwrite = 0; 2220 for (l=0; l<nzB; l++) { 2221 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2222 } 2223 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2224 for (l=0; l<nzB; l++) { 2225 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2226 } 2227 vals += ncols; 2228 rptr[i+1] = rptr[i] + ncols; 2229 } 2230 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2231 2232 /* send nzlocal to others, and recv other's nzlocal */ 2233 /*--------------------------------------------------*/ 2234 if (reuse == MAT_INITIAL_MATRIX){ 2235 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2236 s_waits2 = s_waits3 + nsends; 2237 s_waits1 = s_waits2 + nsends; 2238 r_waits1 = s_waits1 + nsends; 2239 r_waits2 = r_waits1 + nrecvs; 2240 r_waits3 = r_waits2 + nrecvs; 2241 } else { 2242 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2243 r_waits3 = s_waits3 + nsends; 2244 } 2245 2246 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2247 if (reuse == MAT_INITIAL_MATRIX){ 2248 /* get new tags to keep the communication clean */ 2249 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2250 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2251 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2252 rbuf_nz = sbuf_nz + nsends; 2253 2254 /* post receives of other's nzlocal */ 2255 for (i=0; i<nrecvs; i++){ 2256 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2257 } 2258 /* send nzlocal to others */ 2259 for (i=0; i<nsends; i++){ 2260 sbuf_nz[i] = nzlocal; 2261 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2262 } 2263 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2264 count = nrecvs; 2265 while (count) { 2266 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2267 recv_rank[imdex] = recv_status.MPI_SOURCE; 2268 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2269 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2270 2271 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2272 rbuf_nz[imdex] += i + 2; 2273 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2274 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2275 count--; 2276 } 2277 /* wait on sends of nzlocal */ 2278 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2279 /* send mat->i,j to others, and recv from other's */ 2280 /*------------------------------------------------*/ 2281 for (i=0; i<nsends; i++){ 2282 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2283 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2284 } 2285 /* wait on receives of mat->i,j */ 2286 /*------------------------------*/ 2287 count = nrecvs; 2288 while (count) { 2289 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2290 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2291 count--; 2292 } 2293 /* wait on sends of mat->i,j */ 2294 /*---------------------------*/ 2295 if (nsends) { 2296 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2297 } 2298 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2299 2300 /* post receives, send and receive mat->a */ 2301 /*----------------------------------------*/ 2302 for (imdex=0; imdex<nrecvs; imdex++) { 2303 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2304 } 2305 for (i=0; i<nsends; i++){ 2306 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2307 } 2308 count = nrecvs; 2309 while (count) { 2310 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2311 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2312 count--; 2313 } 2314 if (nsends) { 2315 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2316 } 2317 2318 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2319 2320 /* create redundant matrix */ 2321 /*-------------------------*/ 2322 if (reuse == MAT_INITIAL_MATRIX){ 2323 /* compute rownz_max for preallocation */ 2324 for (imdex=0; imdex<nrecvs; imdex++){ 2325 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2326 rptr = rbuf_j[imdex]; 2327 for (i=0; i<j; i++){ 2328 ncols = rptr[i+1] - rptr[i]; 2329 if (rownz_max < ncols) rownz_max = ncols; 2330 } 2331 } 2332 2333 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2334 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2335 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2336 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2337 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 } else { 2339 C = *matredundant; 2340 } 2341 2342 /* insert local matrix entries */ 2343 rptr = sbuf_j; 2344 cols = sbuf_j + rend-rstart + 1; 2345 vals = sbuf_a; 2346 for (i=0; i<rend-rstart; i++){ 2347 row = i + rstart; 2348 ncols = rptr[i+1] - rptr[i]; 2349 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2350 vals += ncols; 2351 cols += ncols; 2352 } 2353 /* insert received matrix entries */ 2354 for (imdex=0; imdex<nrecvs; imdex++){ 2355 rstart = rowrange[recv_rank[imdex]]; 2356 rend = rowrange[recv_rank[imdex]+1]; 2357 rptr = rbuf_j[imdex]; 2358 cols = rbuf_j[imdex] + rend-rstart + 1; 2359 vals = rbuf_a[imdex]; 2360 for (i=0; i<rend-rstart; i++){ 2361 row = i + rstart; 2362 ncols = rptr[i+1] - rptr[i]; 2363 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2364 vals += ncols; 2365 cols += ncols; 2366 } 2367 } 2368 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2371 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2372 if (reuse == MAT_INITIAL_MATRIX){ 2373 PetscContainer container; 2374 *matredundant = C; 2375 /* create a supporting struct and attach it to C for reuse */ 2376 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2377 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2378 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2379 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2380 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2381 2382 redund->nzlocal = nzlocal; 2383 redund->nsends = nsends; 2384 redund->nrecvs = nrecvs; 2385 redund->send_rank = send_rank; 2386 redund->sbuf_nz = sbuf_nz; 2387 redund->sbuf_j = sbuf_j; 2388 redund->sbuf_a = sbuf_a; 2389 redund->rbuf_j = rbuf_j; 2390 redund->rbuf_a = rbuf_a; 2391 2392 redund->MatDestroy = C->ops->destroy; 2393 C->ops->destroy = MatDestroy_MatRedundant; 2394 } 2395 PetscFunctionReturn(0); 2396 } 2397 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2400 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2401 { 2402 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2403 PetscErrorCode ierr; 2404 PetscInt i,*idxb = 0; 2405 PetscScalar *va,*vb; 2406 Vec vtmp; 2407 2408 PetscFunctionBegin; 2409 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2410 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2411 if (idx) { 2412 for (i=0; i<A->rmap->n; i++) { 2413 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2414 } 2415 } 2416 2417 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2418 if (idx) { 2419 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2420 } 2421 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2422 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2423 2424 for (i=0; i<A->rmap->n; i++){ 2425 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2426 va[i] = vb[i]; 2427 if (idx) idx[i] = a->garray[idxb[i]]; 2428 } 2429 } 2430 2431 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2432 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2433 if (idxb) { 2434 ierr = PetscFree(idxb);CHKERRQ(ierr); 2435 } 2436 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2442 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2443 { 2444 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2445 PetscErrorCode ierr; 2446 PetscInt i,*idxb = 0; 2447 PetscScalar *va,*vb; 2448 Vec vtmp; 2449 2450 PetscFunctionBegin; 2451 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2452 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2453 if (idx) { 2454 for (i=0; i<A->cmap->n; i++) { 2455 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2456 } 2457 } 2458 2459 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2460 if (idx) { 2461 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2462 } 2463 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2464 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2465 2466 for (i=0; i<A->rmap->n; i++){ 2467 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2468 va[i] = vb[i]; 2469 if (idx) idx[i] = a->garray[idxb[i]]; 2470 } 2471 } 2472 2473 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2474 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2475 if (idxb) { 2476 ierr = PetscFree(idxb);CHKERRQ(ierr); 2477 } 2478 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2484 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2485 { 2486 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2487 PetscInt n = A->rmap->n; 2488 PetscInt cstart = A->cmap->rstart; 2489 PetscInt *cmap = mat->garray; 2490 PetscInt *diagIdx, *offdiagIdx; 2491 Vec diagV, offdiagV; 2492 PetscScalar *a, *diagA, *offdiagA; 2493 PetscInt r; 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2498 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2499 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2500 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2501 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2502 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2503 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2504 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2505 for(r = 0; r < n; ++r) { 2506 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2507 a[r] = diagA[r]; 2508 idx[r] = cstart + diagIdx[r]; 2509 } else { 2510 a[r] = offdiagA[r]; 2511 idx[r] = cmap[offdiagIdx[r]]; 2512 } 2513 } 2514 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2515 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2516 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2517 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2518 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2519 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 #undef __FUNCT__ 2524 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2525 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2526 { 2527 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2528 PetscInt n = A->rmap->n; 2529 PetscInt cstart = A->cmap->rstart; 2530 PetscInt *cmap = mat->garray; 2531 PetscInt *diagIdx, *offdiagIdx; 2532 Vec diagV, offdiagV; 2533 PetscScalar *a, *diagA, *offdiagA; 2534 PetscInt r; 2535 PetscErrorCode ierr; 2536 2537 PetscFunctionBegin; 2538 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2539 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2540 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2541 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2542 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2543 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2544 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2545 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2546 for(r = 0; r < n; ++r) { 2547 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2548 a[r] = diagA[r]; 2549 idx[r] = cstart + diagIdx[r]; 2550 } else { 2551 a[r] = offdiagA[r]; 2552 idx[r] = cmap[offdiagIdx[r]]; 2553 } 2554 } 2555 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2556 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2557 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2558 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2559 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2560 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2566 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2567 { 2568 PetscErrorCode ierr; 2569 Mat *dummy; 2570 2571 PetscFunctionBegin; 2572 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2573 *newmat = *dummy; 2574 ierr = PetscFree(dummy);CHKERRQ(ierr); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2579 /* -------------------------------------------------------------------*/ 2580 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2581 MatGetRow_MPIAIJ, 2582 MatRestoreRow_MPIAIJ, 2583 MatMult_MPIAIJ, 2584 /* 4*/ MatMultAdd_MPIAIJ, 2585 MatMultTranspose_MPIAIJ, 2586 MatMultTransposeAdd_MPIAIJ, 2587 #ifdef PETSC_HAVE_PBGL 2588 MatSolve_MPIAIJ, 2589 #else 2590 0, 2591 #endif 2592 0, 2593 0, 2594 /*10*/ 0, 2595 0, 2596 0, 2597 MatSOR_MPIAIJ, 2598 MatTranspose_MPIAIJ, 2599 /*15*/ MatGetInfo_MPIAIJ, 2600 MatEqual_MPIAIJ, 2601 MatGetDiagonal_MPIAIJ, 2602 MatDiagonalScale_MPIAIJ, 2603 MatNorm_MPIAIJ, 2604 /*20*/ MatAssemblyBegin_MPIAIJ, 2605 MatAssemblyEnd_MPIAIJ, 2606 MatSetOption_MPIAIJ, 2607 MatZeroEntries_MPIAIJ, 2608 /*24*/ MatZeroRows_MPIAIJ, 2609 0, 2610 #ifdef PETSC_HAVE_PBGL 2611 0, 2612 #else 2613 0, 2614 #endif 2615 0, 2616 0, 2617 /*29*/ MatSetUpPreallocation_MPIAIJ, 2618 #ifdef PETSC_HAVE_PBGL 2619 0, 2620 #else 2621 0, 2622 #endif 2623 0, 2624 0, 2625 0, 2626 /*34*/ MatDuplicate_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /*39*/ MatAXPY_MPIAIJ, 2632 MatGetSubMatrices_MPIAIJ, 2633 MatIncreaseOverlap_MPIAIJ, 2634 MatGetValues_MPIAIJ, 2635 MatCopy_MPIAIJ, 2636 /*44*/ MatGetRowMax_MPIAIJ, 2637 MatScale_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 /*49*/ MatSetBlockSize_MPIAIJ, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /*54*/ MatFDColoringCreate_MPIAIJ, 2647 0, 2648 MatSetUnfactored_MPIAIJ, 2649 MatPermute_MPIAIJ, 2650 0, 2651 /*59*/ MatGetSubMatrix_MPIAIJ, 2652 MatDestroy_MPIAIJ, 2653 MatView_MPIAIJ, 2654 0, 2655 0, 2656 /*64*/ 0, 2657 0, 2658 0, 2659 0, 2660 0, 2661 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2662 MatGetRowMinAbs_MPIAIJ, 2663 0, 2664 MatSetColoring_MPIAIJ, 2665 #if defined(PETSC_HAVE_ADIC) 2666 MatSetValuesAdic_MPIAIJ, 2667 #else 2668 0, 2669 #endif 2670 MatSetValuesAdifor_MPIAIJ, 2671 /*75*/ MatFDColoringApply_AIJ, 2672 0, 2673 0, 2674 0, 2675 0, 2676 /*80*/ 0, 2677 0, 2678 0, 2679 /*83*/ MatLoad_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2686 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2687 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2688 MatPtAP_Basic, 2689 MatPtAPSymbolic_MPIAIJ, 2690 /*94*/ MatPtAPNumeric_MPIAIJ, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /*99*/ 0, 2696 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2697 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2698 MatConjugate_MPIAIJ, 2699 0, 2700 /*104*/MatSetValuesRow_MPIAIJ, 2701 MatRealPart_MPIAIJ, 2702 MatImaginaryPart_MPIAIJ, 2703 0, 2704 0, 2705 /*109*/0, 2706 MatGetRedundantMatrix_MPIAIJ, 2707 MatGetRowMin_MPIAIJ, 2708 0, 2709 0, 2710 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2711 0, 2712 0, 2713 0, 2714 0, 2715 0 2716 }; 2717 2718 /* ----------------------------------------------------------------------------------------*/ 2719 2720 EXTERN_C_BEGIN 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2723 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2724 { 2725 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2726 PetscErrorCode ierr; 2727 2728 PetscFunctionBegin; 2729 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2730 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2731 PetscFunctionReturn(0); 2732 } 2733 EXTERN_C_END 2734 2735 EXTERN_C_BEGIN 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2738 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2739 { 2740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2745 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 EXTERN_C_END 2749 2750 #include "petscpc.h" 2751 EXTERN_C_BEGIN 2752 #undef __FUNCT__ 2753 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2754 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2755 { 2756 Mat_MPIAIJ *b; 2757 PetscErrorCode ierr; 2758 PetscInt i; 2759 2760 PetscFunctionBegin; 2761 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2762 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2763 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2764 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2765 2766 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2767 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2768 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2770 if (d_nnz) { 2771 for (i=0; i<B->rmap->n; i++) { 2772 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2773 } 2774 } 2775 if (o_nnz) { 2776 for (i=0; i<B->rmap->n; i++) { 2777 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2778 } 2779 } 2780 b = (Mat_MPIAIJ*)B->data; 2781 2782 if (!B->preallocated) { 2783 /* Explicitly create 2 MATSEQAIJ matrices. */ 2784 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2785 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2786 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2788 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2789 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2790 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2792 } 2793 2794 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2795 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2796 B->preallocated = PETSC_TRUE; 2797 PetscFunctionReturn(0); 2798 } 2799 EXTERN_C_END 2800 2801 #undef __FUNCT__ 2802 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2803 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2804 { 2805 Mat mat; 2806 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2807 PetscErrorCode ierr; 2808 2809 PetscFunctionBegin; 2810 *newmat = 0; 2811 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2812 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2813 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2814 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2815 a = (Mat_MPIAIJ*)mat->data; 2816 2817 mat->factor = matin->factor; 2818 mat->rmap->bs = matin->rmap->bs; 2819 mat->assembled = PETSC_TRUE; 2820 mat->insertmode = NOT_SET_VALUES; 2821 mat->preallocated = PETSC_TRUE; 2822 2823 a->size = oldmat->size; 2824 a->rank = oldmat->rank; 2825 a->donotstash = oldmat->donotstash; 2826 a->roworiented = oldmat->roworiented; 2827 a->rowindices = 0; 2828 a->rowvalues = 0; 2829 a->getrowactive = PETSC_FALSE; 2830 2831 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2832 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2833 2834 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2835 if (oldmat->colmap) { 2836 #if defined (PETSC_USE_CTABLE) 2837 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2838 #else 2839 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2840 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2841 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 #endif 2843 } else a->colmap = 0; 2844 if (oldmat->garray) { 2845 PetscInt len; 2846 len = oldmat->B->cmap->n; 2847 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2848 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2849 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2850 } else a->garray = 0; 2851 2852 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2854 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2856 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2858 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2860 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2861 *newmat = mat; 2862 PetscFunctionReturn(0); 2863 } 2864 2865 #include "petscsys.h" 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatLoad_MPIAIJ" 2869 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2870 { 2871 Mat A; 2872 PetscScalar *vals,*svals; 2873 MPI_Comm comm = ((PetscObject)viewer)->comm; 2874 MPI_Status status; 2875 PetscErrorCode ierr; 2876 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2877 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2878 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2879 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2880 PetscInt cend,cstart,n,*rowners; 2881 int fd; 2882 2883 PetscFunctionBegin; 2884 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2885 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2886 if (!rank) { 2887 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2888 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2889 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2890 } 2891 2892 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2893 M = header[1]; N = header[2]; 2894 /* determine ownership of all rows */ 2895 m = M/size + ((M % size) > rank); 2896 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2897 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2898 2899 /* First process needs enough room for process with most rows */ 2900 if (!rank) { 2901 mmax = rowners[1]; 2902 for (i=2; i<size; i++) { 2903 mmax = PetscMax(mmax,rowners[i]); 2904 } 2905 } else mmax = m; 2906 2907 rowners[0] = 0; 2908 for (i=2; i<=size; i++) { 2909 rowners[i] += rowners[i-1]; 2910 } 2911 rstart = rowners[rank]; 2912 rend = rowners[rank+1]; 2913 2914 /* distribute row lengths to all processors */ 2915 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2916 if (!rank) { 2917 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2918 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2919 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2920 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2921 for (j=0; j<m; j++) { 2922 procsnz[0] += ourlens[j]; 2923 } 2924 for (i=1; i<size; i++) { 2925 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2926 /* calculate the number of nonzeros on each processor */ 2927 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2928 procsnz[i] += rowlengths[j]; 2929 } 2930 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2931 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2932 } 2933 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2934 } else { 2935 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2936 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2937 } 2938 2939 if (!rank) { 2940 /* determine max buffer needed and allocate it */ 2941 maxnz = 0; 2942 for (i=0; i<size; i++) { 2943 maxnz = PetscMax(maxnz,procsnz[i]); 2944 } 2945 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2946 2947 /* read in my part of the matrix column indices */ 2948 nz = procsnz[0]; 2949 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2950 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2951 2952 /* read in every one elses and ship off */ 2953 for (i=1; i<size; i++) { 2954 nz = procsnz[i]; 2955 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2956 mpicnt = PetscMPIIntCast(nz); 2957 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2958 } 2959 ierr = PetscFree(cols);CHKERRQ(ierr); 2960 } else { 2961 /* determine buffer space needed for message */ 2962 nz = 0; 2963 for (i=0; i<m; i++) { 2964 nz += ourlens[i]; 2965 } 2966 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2967 2968 /* receive message of column indices*/ 2969 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2970 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2971 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2972 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2973 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2974 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2975 } 2976 2977 /* determine column ownership if matrix is not square */ 2978 if (N != M) { 2979 n = N/size + ((N % size) > rank); 2980 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2981 cstart = cend - n; 2982 } else { 2983 cstart = rstart; 2984 cend = rend; 2985 n = cend - cstart; 2986 } 2987 2988 /* loop over local rows, determining number of off diagonal entries */ 2989 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2990 jj = 0; 2991 for (i=0; i<m; i++) { 2992 for (j=0; j<ourlens[i]; j++) { 2993 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2994 jj++; 2995 } 2996 } 2997 2998 /* create our matrix */ 2999 for (i=0; i<m; i++) { 3000 ourlens[i] -= offlens[i]; 3001 } 3002 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3003 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3004 ierr = MatSetType(A,type);CHKERRQ(ierr); 3005 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3006 3007 for (i=0; i<m; i++) { 3008 ourlens[i] += offlens[i]; 3009 } 3010 3011 if (!rank) { 3012 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3013 3014 /* read in my part of the matrix numerical values */ 3015 nz = procsnz[0]; 3016 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3017 3018 /* insert into matrix */ 3019 jj = rstart; 3020 smycols = mycols; 3021 svals = vals; 3022 for (i=0; i<m; i++) { 3023 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3024 smycols += ourlens[i]; 3025 svals += ourlens[i]; 3026 jj++; 3027 } 3028 3029 /* read in other processors and ship out */ 3030 for (i=1; i<size; i++) { 3031 nz = procsnz[i]; 3032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3033 mpicnt = PetscMPIIntCast(nz); 3034 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3035 } 3036 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3037 } else { 3038 /* receive numeric values */ 3039 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3040 3041 /* receive message of values*/ 3042 mpicnt = PetscMPIIntCast(nz); 3043 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3044 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3045 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3046 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3047 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3048 3049 /* insert into matrix */ 3050 jj = rstart; 3051 smycols = mycols; 3052 svals = vals; 3053 for (i=0; i<m; i++) { 3054 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3055 smycols += ourlens[i]; 3056 svals += ourlens[i]; 3057 jj++; 3058 } 3059 } 3060 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3061 ierr = PetscFree(vals);CHKERRQ(ierr); 3062 ierr = PetscFree(mycols);CHKERRQ(ierr); 3063 ierr = PetscFree(rowners);CHKERRQ(ierr); 3064 3065 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 *newmat = A; 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3074 { 3075 PetscErrorCode ierr; 3076 IS iscol_local; 3077 PetscInt csize; 3078 3079 PetscFunctionBegin; 3080 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3081 if (call == MAT_REUSE_MATRIX) { 3082 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3083 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3084 } else { 3085 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3086 } 3087 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3088 if (call == MAT_INITIAL_MATRIX) { 3089 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3090 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3091 } 3092 PetscFunctionReturn(0); 3093 } 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3097 /* 3098 Not great since it makes two copies of the submatrix, first an SeqAIJ 3099 in local and then by concatenating the local matrices the end result. 3100 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3101 3102 Note: This requires a sequential iscol with all indices. 3103 */ 3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3105 { 3106 PetscErrorCode ierr; 3107 PetscMPIInt rank,size; 3108 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3109 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3110 Mat *local,M,Mreuse; 3111 MatScalar *vwork,*aa; 3112 MPI_Comm comm = ((PetscObject)mat)->comm; 3113 Mat_SeqAIJ *aij; 3114 3115 3116 PetscFunctionBegin; 3117 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3118 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3119 3120 if (call == MAT_REUSE_MATRIX) { 3121 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3122 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3123 local = &Mreuse; 3124 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3125 } else { 3126 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3127 Mreuse = *local; 3128 ierr = PetscFree(local);CHKERRQ(ierr); 3129 } 3130 3131 /* 3132 m - number of local rows 3133 n - number of columns (same on all processors) 3134 rstart - first row in new global matrix generated 3135 */ 3136 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3137 if (call == MAT_INITIAL_MATRIX) { 3138 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3139 ii = aij->i; 3140 jj = aij->j; 3141 3142 /* 3143 Determine the number of non-zeros in the diagonal and off-diagonal 3144 portions of the matrix in order to do correct preallocation 3145 */ 3146 3147 /* first get start and end of "diagonal" columns */ 3148 if (csize == PETSC_DECIDE) { 3149 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3150 if (mglobal == n) { /* square matrix */ 3151 nlocal = m; 3152 } else { 3153 nlocal = n/size + ((n % size) > rank); 3154 } 3155 } else { 3156 nlocal = csize; 3157 } 3158 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3159 rstart = rend - nlocal; 3160 if (rank == size - 1 && rend != n) { 3161 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3162 } 3163 3164 /* next, compute all the lengths */ 3165 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3166 olens = dlens + m; 3167 for (i=0; i<m; i++) { 3168 jend = ii[i+1] - ii[i]; 3169 olen = 0; 3170 dlen = 0; 3171 for (j=0; j<jend; j++) { 3172 if (*jj < rstart || *jj >= rend) olen++; 3173 else dlen++; 3174 jj++; 3175 } 3176 olens[i] = olen; 3177 dlens[i] = dlen; 3178 } 3179 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3180 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3181 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3182 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3183 ierr = PetscFree(dlens);CHKERRQ(ierr); 3184 } else { 3185 PetscInt ml,nl; 3186 3187 M = *newmat; 3188 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3189 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3190 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3191 /* 3192 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3193 rather than the slower MatSetValues(). 3194 */ 3195 M->was_assembled = PETSC_TRUE; 3196 M->assembled = PETSC_FALSE; 3197 } 3198 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3199 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3200 ii = aij->i; 3201 jj = aij->j; 3202 aa = aij->a; 3203 for (i=0; i<m; i++) { 3204 row = rstart + i; 3205 nz = ii[i+1] - ii[i]; 3206 cwork = jj; jj += nz; 3207 vwork = aa; aa += nz; 3208 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3209 } 3210 3211 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3212 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3213 *newmat = M; 3214 3215 /* save submatrix used in processor for next request */ 3216 if (call == MAT_INITIAL_MATRIX) { 3217 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3218 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3219 } 3220 3221 PetscFunctionReturn(0); 3222 } 3223 3224 EXTERN_C_BEGIN 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3228 { 3229 PetscInt m,cstart, cend,j,nnz,i,d; 3230 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3231 const PetscInt *JJ; 3232 PetscScalar *values; 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3237 3238 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3239 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3242 m = B->rmap->n; 3243 cstart = B->cmap->rstart; 3244 cend = B->cmap->rend; 3245 rstart = B->rmap->rstart; 3246 3247 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3248 o_nnz = d_nnz + m; 3249 3250 #if defined(PETSC_USE_DEBUGGING) 3251 for (i=0; i<m; i++) { 3252 nnz = Ii[i+1]- Ii[i]; 3253 JJ = J + Ii[i]; 3254 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3255 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3256 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3257 for (j=1; j<nnz; j++) { 3258 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3259 } 3260 } 3261 #endif 3262 3263 for (i=0; i<m; i++) { 3264 nnz = Ii[i+1]- Ii[i]; 3265 JJ = J + Ii[i]; 3266 nnz_max = PetscMax(nnz_max,nnz); 3267 for (j=0; j<nnz; j++) { 3268 if (*JJ >= cstart) break; 3269 JJ++; 3270 } 3271 d = 0; 3272 for (; j<nnz; j++) { 3273 if (*JJ++ >= cend) break; 3274 d++; 3275 } 3276 d_nnz[i] = d; 3277 o_nnz[i] = nnz - d; 3278 } 3279 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3280 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3281 3282 if (v) values = (PetscScalar*)v; 3283 else { 3284 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3285 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3286 } 3287 3288 for (i=0; i<m; i++) { 3289 ii = i + rstart; 3290 nnz = Ii[i+1]- Ii[i]; 3291 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3292 } 3293 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3294 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3295 3296 if (!v) { 3297 ierr = PetscFree(values);CHKERRQ(ierr); 3298 } 3299 PetscFunctionReturn(0); 3300 } 3301 EXTERN_C_END 3302 3303 #undef __FUNCT__ 3304 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3305 /*@ 3306 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3307 (the default parallel PETSc format). 3308 3309 Collective on MPI_Comm 3310 3311 Input Parameters: 3312 + B - the matrix 3313 . i - the indices into j for the start of each local row (starts with zero) 3314 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3315 - v - optional values in the matrix 3316 3317 Level: developer 3318 3319 Notes: 3320 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3321 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3322 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3323 3324 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3325 3326 The format which is used for the sparse matrix input, is equivalent to a 3327 row-major ordering.. i.e for the following matrix, the input data expected is 3328 as shown: 3329 3330 1 0 0 3331 2 0 3 P0 3332 ------- 3333 4 5 6 P1 3334 3335 Process0 [P0]: rows_owned=[0,1] 3336 i = {0,1,3} [size = nrow+1 = 2+1] 3337 j = {0,0,2} [size = nz = 6] 3338 v = {1,2,3} [size = nz = 6] 3339 3340 Process1 [P1]: rows_owned=[2] 3341 i = {0,3} [size = nrow+1 = 1+1] 3342 j = {0,1,2} [size = nz = 6] 3343 v = {4,5,6} [size = nz = 6] 3344 3345 The column indices for each row MUST be sorted. 3346 3347 .keywords: matrix, aij, compressed row, sparse, parallel 3348 3349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3350 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3351 @*/ 3352 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3353 { 3354 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3355 3356 PetscFunctionBegin; 3357 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3358 if (f) { 3359 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3360 } 3361 PetscFunctionReturn(0); 3362 } 3363 3364 #undef __FUNCT__ 3365 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3366 /*@C 3367 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3368 (the default parallel PETSc format). For good matrix assembly performance 3369 the user should preallocate the matrix storage by setting the parameters 3370 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3371 performance can be increased by more than a factor of 50. 3372 3373 Collective on MPI_Comm 3374 3375 Input Parameters: 3376 + A - the matrix 3377 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3378 (same value is used for all local rows) 3379 . d_nnz - array containing the number of nonzeros in the various rows of the 3380 DIAGONAL portion of the local submatrix (possibly different for each row) 3381 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3382 The size of this array is equal to the number of local rows, i.e 'm'. 3383 You must leave room for the diagonal entry even if it is zero. 3384 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3385 submatrix (same value is used for all local rows). 3386 - o_nnz - array containing the number of nonzeros in the various rows of the 3387 OFF-DIAGONAL portion of the local submatrix (possibly different for 3388 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3389 structure. The size of this array is equal to the number 3390 of local rows, i.e 'm'. 3391 3392 If the *_nnz parameter is given then the *_nz parameter is ignored 3393 3394 The AIJ format (also called the Yale sparse matrix format or 3395 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3396 storage. The stored row and column indices begin with zero. See the users manual for details. 3397 3398 The parallel matrix is partitioned such that the first m0 rows belong to 3399 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3400 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3401 3402 The DIAGONAL portion of the local submatrix of a processor can be defined 3403 as the submatrix which is obtained by extraction the part corresponding 3404 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3405 first row that belongs to the processor, and r2 is the last row belonging 3406 to the this processor. This is a square mxm matrix. The remaining portion 3407 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3408 3409 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3410 3411 You can call MatGetInfo() to get information on how effective the preallocation was; 3412 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3413 You can also run with the option -info and look for messages with the string 3414 malloc in them to see if additional memory allocation was needed. 3415 3416 Example usage: 3417 3418 Consider the following 8x8 matrix with 34 non-zero values, that is 3419 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3420 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3421 as follows: 3422 3423 .vb 3424 1 2 0 | 0 3 0 | 0 4 3425 Proc0 0 5 6 | 7 0 0 | 8 0 3426 9 0 10 | 11 0 0 | 12 0 3427 ------------------------------------- 3428 13 0 14 | 15 16 17 | 0 0 3429 Proc1 0 18 0 | 19 20 21 | 0 0 3430 0 0 0 | 22 23 0 | 24 0 3431 ------------------------------------- 3432 Proc2 25 26 27 | 0 0 28 | 29 0 3433 30 0 0 | 31 32 33 | 0 34 3434 .ve 3435 3436 This can be represented as a collection of submatrices as: 3437 3438 .vb 3439 A B C 3440 D E F 3441 G H I 3442 .ve 3443 3444 Where the submatrices A,B,C are owned by proc0, D,E,F are 3445 owned by proc1, G,H,I are owned by proc2. 3446 3447 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3448 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3449 The 'M','N' parameters are 8,8, and have the same values on all procs. 3450 3451 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3452 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3453 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3454 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3455 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3456 matrix, ans [DF] as another SeqAIJ matrix. 3457 3458 When d_nz, o_nz parameters are specified, d_nz storage elements are 3459 allocated for every row of the local diagonal submatrix, and o_nz 3460 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3461 One way to choose d_nz and o_nz is to use the max nonzerors per local 3462 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3463 In this case, the values of d_nz,o_nz are: 3464 .vb 3465 proc0 : dnz = 2, o_nz = 2 3466 proc1 : dnz = 3, o_nz = 2 3467 proc2 : dnz = 1, o_nz = 4 3468 .ve 3469 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3470 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3471 for proc3. i.e we are using 12+15+10=37 storage locations to store 3472 34 values. 3473 3474 When d_nnz, o_nnz parameters are specified, the storage is specified 3475 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3476 In the above case the values for d_nnz,o_nnz are: 3477 .vb 3478 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3479 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3480 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3481 .ve 3482 Here the space allocated is sum of all the above values i.e 34, and 3483 hence pre-allocation is perfect. 3484 3485 Level: intermediate 3486 3487 .keywords: matrix, aij, compressed row, sparse, parallel 3488 3489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3490 MPIAIJ, MatGetInfo() 3491 @*/ 3492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3493 { 3494 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3495 3496 PetscFunctionBegin; 3497 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3498 if (f) { 3499 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3500 } 3501 PetscFunctionReturn(0); 3502 } 3503 3504 #undef __FUNCT__ 3505 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3506 /*@ 3507 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3508 CSR format the local rows. 3509 3510 Collective on MPI_Comm 3511 3512 Input Parameters: 3513 + comm - MPI communicator 3514 . m - number of local rows (Cannot be PETSC_DECIDE) 3515 . n - This value should be the same as the local size used in creating the 3516 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3517 calculated if N is given) For square matrices n is almost always m. 3518 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3519 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3520 . i - row indices 3521 . j - column indices 3522 - a - matrix values 3523 3524 Output Parameter: 3525 . mat - the matrix 3526 3527 Level: intermediate 3528 3529 Notes: 3530 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3531 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3532 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3533 3534 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3535 3536 The format which is used for the sparse matrix input, is equivalent to a 3537 row-major ordering.. i.e for the following matrix, the input data expected is 3538 as shown: 3539 3540 1 0 0 3541 2 0 3 P0 3542 ------- 3543 4 5 6 P1 3544 3545 Process0 [P0]: rows_owned=[0,1] 3546 i = {0,1,3} [size = nrow+1 = 2+1] 3547 j = {0,0,2} [size = nz = 6] 3548 v = {1,2,3} [size = nz = 6] 3549 3550 Process1 [P1]: rows_owned=[2] 3551 i = {0,3} [size = nrow+1 = 1+1] 3552 j = {0,1,2} [size = nz = 6] 3553 v = {4,5,6} [size = nz = 6] 3554 3555 .keywords: matrix, aij, compressed row, sparse, parallel 3556 3557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3558 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3559 @*/ 3560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3561 { 3562 PetscErrorCode ierr; 3563 3564 PetscFunctionBegin; 3565 if (i[0]) { 3566 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3567 } 3568 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3569 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3570 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3571 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3572 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3573 PetscFunctionReturn(0); 3574 } 3575 3576 #undef __FUNCT__ 3577 #define __FUNCT__ "MatCreateMPIAIJ" 3578 /*@C 3579 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3580 (the default parallel PETSc format). For good matrix assembly performance 3581 the user should preallocate the matrix storage by setting the parameters 3582 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3583 performance can be increased by more than a factor of 50. 3584 3585 Collective on MPI_Comm 3586 3587 Input Parameters: 3588 + comm - MPI communicator 3589 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3590 This value should be the same as the local size used in creating the 3591 y vector for the matrix-vector product y = Ax. 3592 . n - This value should be the same as the local size used in creating the 3593 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3594 calculated if N is given) For square matrices n is almost always m. 3595 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3596 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3597 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3598 (same value is used for all local rows) 3599 . d_nnz - array containing the number of nonzeros in the various rows of the 3600 DIAGONAL portion of the local submatrix (possibly different for each row) 3601 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3602 The size of this array is equal to the number of local rows, i.e 'm'. 3603 You must leave room for the diagonal entry even if it is zero. 3604 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3605 submatrix (same value is used for all local rows). 3606 - o_nnz - array containing the number of nonzeros in the various rows of the 3607 OFF-DIAGONAL portion of the local submatrix (possibly different for 3608 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3609 structure. The size of this array is equal to the number 3610 of local rows, i.e 'm'. 3611 3612 Output Parameter: 3613 . A - the matrix 3614 3615 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3616 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3617 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3618 3619 Notes: 3620 If the *_nnz parameter is given then the *_nz parameter is ignored 3621 3622 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3623 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3624 storage requirements for this matrix. 3625 3626 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3627 processor than it must be used on all processors that share the object for 3628 that argument. 3629 3630 The user MUST specify either the local or global matrix dimensions 3631 (possibly both). 3632 3633 The parallel matrix is partitioned across processors such that the 3634 first m0 rows belong to process 0, the next m1 rows belong to 3635 process 1, the next m2 rows belong to process 2 etc.. where 3636 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3637 values corresponding to [m x N] submatrix. 3638 3639 The columns are logically partitioned with the n0 columns belonging 3640 to 0th partition, the next n1 columns belonging to the next 3641 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3642 3643 The DIAGONAL portion of the local submatrix on any given processor 3644 is the submatrix corresponding to the rows and columns m,n 3645 corresponding to the given processor. i.e diagonal matrix on 3646 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3647 etc. The remaining portion of the local submatrix [m x (N-n)] 3648 constitute the OFF-DIAGONAL portion. The example below better 3649 illustrates this concept. 3650 3651 For a square global matrix we define each processor's diagonal portion 3652 to be its local rows and the corresponding columns (a square submatrix); 3653 each processor's off-diagonal portion encompasses the remainder of the 3654 local matrix (a rectangular submatrix). 3655 3656 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3657 3658 When calling this routine with a single process communicator, a matrix of 3659 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3660 type of communicator, use the construction mechanism: 3661 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3662 3663 By default, this format uses inodes (identical nodes) when possible. 3664 We search for consecutive rows with the same nonzero structure, thereby 3665 reusing matrix information to achieve increased efficiency. 3666 3667 Options Database Keys: 3668 + -mat_no_inode - Do not use inodes 3669 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3670 - -mat_aij_oneindex - Internally use indexing starting at 1 3671 rather than 0. Note that when calling MatSetValues(), 3672 the user still MUST index entries starting at 0! 3673 3674 3675 Example usage: 3676 3677 Consider the following 8x8 matrix with 34 non-zero values, that is 3678 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3679 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3680 as follows: 3681 3682 .vb 3683 1 2 0 | 0 3 0 | 0 4 3684 Proc0 0 5 6 | 7 0 0 | 8 0 3685 9 0 10 | 11 0 0 | 12 0 3686 ------------------------------------- 3687 13 0 14 | 15 16 17 | 0 0 3688 Proc1 0 18 0 | 19 20 21 | 0 0 3689 0 0 0 | 22 23 0 | 24 0 3690 ------------------------------------- 3691 Proc2 25 26 27 | 0 0 28 | 29 0 3692 30 0 0 | 31 32 33 | 0 34 3693 .ve 3694 3695 This can be represented as a collection of submatrices as: 3696 3697 .vb 3698 A B C 3699 D E F 3700 G H I 3701 .ve 3702 3703 Where the submatrices A,B,C are owned by proc0, D,E,F are 3704 owned by proc1, G,H,I are owned by proc2. 3705 3706 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3707 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3708 The 'M','N' parameters are 8,8, and have the same values on all procs. 3709 3710 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3711 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3712 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3713 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3714 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3715 matrix, ans [DF] as another SeqAIJ matrix. 3716 3717 When d_nz, o_nz parameters are specified, d_nz storage elements are 3718 allocated for every row of the local diagonal submatrix, and o_nz 3719 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3720 One way to choose d_nz and o_nz is to use the max nonzerors per local 3721 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3722 In this case, the values of d_nz,o_nz are: 3723 .vb 3724 proc0 : dnz = 2, o_nz = 2 3725 proc1 : dnz = 3, o_nz = 2 3726 proc2 : dnz = 1, o_nz = 4 3727 .ve 3728 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3729 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3730 for proc3. i.e we are using 12+15+10=37 storage locations to store 3731 34 values. 3732 3733 When d_nnz, o_nnz parameters are specified, the storage is specified 3734 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3735 In the above case the values for d_nnz,o_nnz are: 3736 .vb 3737 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3738 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3739 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3740 .ve 3741 Here the space allocated is sum of all the above values i.e 34, and 3742 hence pre-allocation is perfect. 3743 3744 Level: intermediate 3745 3746 .keywords: matrix, aij, compressed row, sparse, parallel 3747 3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3749 MPIAIJ, MatCreateMPIAIJWithArrays() 3750 @*/ 3751 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3752 { 3753 PetscErrorCode ierr; 3754 PetscMPIInt size; 3755 3756 PetscFunctionBegin; 3757 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3758 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3759 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3760 if (size > 1) { 3761 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3762 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3763 } else { 3764 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3765 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3766 } 3767 PetscFunctionReturn(0); 3768 } 3769 3770 #undef __FUNCT__ 3771 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3772 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3773 { 3774 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3775 3776 PetscFunctionBegin; 3777 *Ad = a->A; 3778 *Ao = a->B; 3779 *colmap = a->garray; 3780 PetscFunctionReturn(0); 3781 } 3782 3783 #undef __FUNCT__ 3784 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3785 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3786 { 3787 PetscErrorCode ierr; 3788 PetscInt i; 3789 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3790 3791 PetscFunctionBegin; 3792 if (coloring->ctype == IS_COLORING_GLOBAL) { 3793 ISColoringValue *allcolors,*colors; 3794 ISColoring ocoloring; 3795 3796 /* set coloring for diagonal portion */ 3797 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3798 3799 /* set coloring for off-diagonal portion */ 3800 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3801 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3802 for (i=0; i<a->B->cmap->n; i++) { 3803 colors[i] = allcolors[a->garray[i]]; 3804 } 3805 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3806 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3807 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3808 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3809 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3810 ISColoringValue *colors; 3811 PetscInt *larray; 3812 ISColoring ocoloring; 3813 3814 /* set coloring for diagonal portion */ 3815 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3816 for (i=0; i<a->A->cmap->n; i++) { 3817 larray[i] = i + A->cmap->rstart; 3818 } 3819 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3820 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3821 for (i=0; i<a->A->cmap->n; i++) { 3822 colors[i] = coloring->colors[larray[i]]; 3823 } 3824 ierr = PetscFree(larray);CHKERRQ(ierr); 3825 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3826 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3827 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3828 3829 /* set coloring for off-diagonal portion */ 3830 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3831 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3832 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3833 for (i=0; i<a->B->cmap->n; i++) { 3834 colors[i] = coloring->colors[larray[i]]; 3835 } 3836 ierr = PetscFree(larray);CHKERRQ(ierr); 3837 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3838 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3839 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3840 } else { 3841 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3842 } 3843 3844 PetscFunctionReturn(0); 3845 } 3846 3847 #if defined(PETSC_HAVE_ADIC) 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3850 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3851 { 3852 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3853 PetscErrorCode ierr; 3854 3855 PetscFunctionBegin; 3856 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3857 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3858 PetscFunctionReturn(0); 3859 } 3860 #endif 3861 3862 #undef __FUNCT__ 3863 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3864 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3865 { 3866 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3867 PetscErrorCode ierr; 3868 3869 PetscFunctionBegin; 3870 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3871 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3872 PetscFunctionReturn(0); 3873 } 3874 3875 #undef __FUNCT__ 3876 #define __FUNCT__ "MatMerge" 3877 /*@ 3878 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3879 matrices from each processor 3880 3881 Collective on MPI_Comm 3882 3883 Input Parameters: 3884 + comm - the communicators the parallel matrix will live on 3885 . inmat - the input sequential matrices 3886 . n - number of local columns (or PETSC_DECIDE) 3887 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3888 3889 Output Parameter: 3890 . outmat - the parallel matrix generated 3891 3892 Level: advanced 3893 3894 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3895 3896 @*/ 3897 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3898 { 3899 PetscErrorCode ierr; 3900 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3901 PetscInt *indx; 3902 PetscScalar *values; 3903 3904 PetscFunctionBegin; 3905 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3906 if (scall == MAT_INITIAL_MATRIX){ 3907 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3908 if (n == PETSC_DECIDE){ 3909 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3910 } 3911 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3912 rstart -= m; 3913 3914 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3915 for (i=0;i<m;i++) { 3916 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3917 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3918 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3919 } 3920 /* This routine will ONLY return MPIAIJ type matrix */ 3921 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3922 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3923 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3924 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3925 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3926 3927 } else if (scall == MAT_REUSE_MATRIX){ 3928 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3929 } else { 3930 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3931 } 3932 3933 for (i=0;i<m;i++) { 3934 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3935 Ii = i + rstart; 3936 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3937 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3938 } 3939 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3940 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3941 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3942 3943 PetscFunctionReturn(0); 3944 } 3945 3946 #undef __FUNCT__ 3947 #define __FUNCT__ "MatFileSplit" 3948 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3949 { 3950 PetscErrorCode ierr; 3951 PetscMPIInt rank; 3952 PetscInt m,N,i,rstart,nnz; 3953 size_t len; 3954 const PetscInt *indx; 3955 PetscViewer out; 3956 char *name; 3957 Mat B; 3958 const PetscScalar *values; 3959 3960 PetscFunctionBegin; 3961 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3962 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3963 /* Should this be the type of the diagonal block of A? */ 3964 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3965 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3966 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3967 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3968 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3969 for (i=0;i<m;i++) { 3970 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3971 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3972 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3973 } 3974 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 3977 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3978 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3979 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3980 sprintf(name,"%s.%d",outfile,rank); 3981 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3982 ierr = PetscFree(name); 3983 ierr = MatView(B,out);CHKERRQ(ierr); 3984 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3985 ierr = MatDestroy(B);CHKERRQ(ierr); 3986 PetscFunctionReturn(0); 3987 } 3988 3989 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3990 #undef __FUNCT__ 3991 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3992 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3993 { 3994 PetscErrorCode ierr; 3995 Mat_Merge_SeqsToMPI *merge; 3996 PetscContainer container; 3997 3998 PetscFunctionBegin; 3999 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4000 if (container) { 4001 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4007 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4008 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4009 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4010 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4011 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4012 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4013 4014 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4015 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4016 } 4017 ierr = PetscFree(merge);CHKERRQ(ierr); 4018 4019 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4020 PetscFunctionReturn(0); 4021 } 4022 4023 #include "../src/mat/utils/freespace.h" 4024 #include "petscbt.h" 4025 4026 #undef __FUNCT__ 4027 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4028 /*@C 4029 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4030 matrices from each processor 4031 4032 Collective on MPI_Comm 4033 4034 Input Parameters: 4035 + comm - the communicators the parallel matrix will live on 4036 . seqmat - the input sequential matrices 4037 . m - number of local rows (or PETSC_DECIDE) 4038 . n - number of local columns (or PETSC_DECIDE) 4039 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4040 4041 Output Parameter: 4042 . mpimat - the parallel matrix generated 4043 4044 Level: advanced 4045 4046 Notes: 4047 The dimensions of the sequential matrix in each processor MUST be the same. 4048 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4049 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4050 @*/ 4051 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4052 { 4053 PetscErrorCode ierr; 4054 MPI_Comm comm=((PetscObject)mpimat)->comm; 4055 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4056 PetscMPIInt size,rank,taga,*len_s; 4057 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4058 PetscInt proc,m; 4059 PetscInt **buf_ri,**buf_rj; 4060 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4061 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4062 MPI_Request *s_waits,*r_waits; 4063 MPI_Status *status; 4064 MatScalar *aa=a->a; 4065 MatScalar **abuf_r,*ba_i; 4066 Mat_Merge_SeqsToMPI *merge; 4067 PetscContainer container; 4068 4069 PetscFunctionBegin; 4070 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4071 4072 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4073 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4074 4075 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4076 if (container) { 4077 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4078 } 4079 bi = merge->bi; 4080 bj = merge->bj; 4081 buf_ri = merge->buf_ri; 4082 buf_rj = merge->buf_rj; 4083 4084 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4085 owners = merge->rowmap->range; 4086 len_s = merge->len_s; 4087 4088 /* send and recv matrix values */ 4089 /*-----------------------------*/ 4090 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4091 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4092 4093 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4094 for (proc=0,k=0; proc<size; proc++){ 4095 if (!len_s[proc]) continue; 4096 i = owners[proc]; 4097 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4098 k++; 4099 } 4100 4101 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4102 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4103 ierr = PetscFree(status);CHKERRQ(ierr); 4104 4105 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4106 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4107 4108 /* insert mat values of mpimat */ 4109 /*----------------------------*/ 4110 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4111 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4112 nextrow = buf_ri_k + merge->nrecv; 4113 nextai = nextrow + merge->nrecv; 4114 4115 for (k=0; k<merge->nrecv; k++){ 4116 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4117 nrows = *(buf_ri_k[k]); 4118 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4119 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4120 } 4121 4122 /* set values of ba */ 4123 m = merge->rowmap->n; 4124 for (i=0; i<m; i++) { 4125 arow = owners[rank] + i; 4126 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4127 bnzi = bi[i+1] - bi[i]; 4128 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4129 4130 /* add local non-zero vals of this proc's seqmat into ba */ 4131 anzi = ai[arow+1] - ai[arow]; 4132 aj = a->j + ai[arow]; 4133 aa = a->a + ai[arow]; 4134 nextaj = 0; 4135 for (j=0; nextaj<anzi; j++){ 4136 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4137 ba_i[j] += aa[nextaj++]; 4138 } 4139 } 4140 4141 /* add received vals into ba */ 4142 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4143 /* i-th row */ 4144 if (i == *nextrow[k]) { 4145 anzi = *(nextai[k]+1) - *nextai[k]; 4146 aj = buf_rj[k] + *(nextai[k]); 4147 aa = abuf_r[k] + *(nextai[k]); 4148 nextaj = 0; 4149 for (j=0; nextaj<anzi; j++){ 4150 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4151 ba_i[j] += aa[nextaj++]; 4152 } 4153 } 4154 nextrow[k]++; nextai[k]++; 4155 } 4156 } 4157 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4158 } 4159 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4160 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4161 4162 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4163 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4164 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4165 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4166 PetscFunctionReturn(0); 4167 } 4168 4169 #undef __FUNCT__ 4170 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4171 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4172 { 4173 PetscErrorCode ierr; 4174 Mat B_mpi; 4175 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4176 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4177 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4178 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4179 PetscInt len,proc,*dnz,*onz; 4180 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4181 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4182 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4183 MPI_Status *status; 4184 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4185 PetscBT lnkbt; 4186 Mat_Merge_SeqsToMPI *merge; 4187 PetscContainer container; 4188 4189 PetscFunctionBegin; 4190 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4191 4192 /* make sure it is a PETSc comm */ 4193 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4195 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4196 4197 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4198 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4199 4200 /* determine row ownership */ 4201 /*---------------------------------------------------------*/ 4202 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4203 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4204 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4205 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4206 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4207 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4208 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4209 4210 m = merge->rowmap->n; 4211 M = merge->rowmap->N; 4212 owners = merge->rowmap->range; 4213 4214 /* determine the number of messages to send, their lengths */ 4215 /*---------------------------------------------------------*/ 4216 len_s = merge->len_s; 4217 4218 len = 0; /* length of buf_si[] */ 4219 merge->nsend = 0; 4220 for (proc=0; proc<size; proc++){ 4221 len_si[proc] = 0; 4222 if (proc == rank){ 4223 len_s[proc] = 0; 4224 } else { 4225 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4226 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4227 } 4228 if (len_s[proc]) { 4229 merge->nsend++; 4230 nrows = 0; 4231 for (i=owners[proc]; i<owners[proc+1]; i++){ 4232 if (ai[i+1] > ai[i]) nrows++; 4233 } 4234 len_si[proc] = 2*(nrows+1); 4235 len += len_si[proc]; 4236 } 4237 } 4238 4239 /* determine the number and length of messages to receive for ij-structure */ 4240 /*-------------------------------------------------------------------------*/ 4241 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4242 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4243 4244 /* post the Irecv of j-structure */ 4245 /*-------------------------------*/ 4246 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4247 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4248 4249 /* post the Isend of j-structure */ 4250 /*--------------------------------*/ 4251 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4252 sj_waits = si_waits + merge->nsend; 4253 4254 for (proc=0, k=0; proc<size; proc++){ 4255 if (!len_s[proc]) continue; 4256 i = owners[proc]; 4257 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4258 k++; 4259 } 4260 4261 /* receives and sends of j-structure are complete */ 4262 /*------------------------------------------------*/ 4263 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4264 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4265 4266 /* send and recv i-structure */ 4267 /*---------------------------*/ 4268 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4269 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4270 4271 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4272 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4273 for (proc=0,k=0; proc<size; proc++){ 4274 if (!len_s[proc]) continue; 4275 /* form outgoing message for i-structure: 4276 buf_si[0]: nrows to be sent 4277 [1:nrows]: row index (global) 4278 [nrows+1:2*nrows+1]: i-structure index 4279 */ 4280 /*-------------------------------------------*/ 4281 nrows = len_si[proc]/2 - 1; 4282 buf_si_i = buf_si + nrows+1; 4283 buf_si[0] = nrows; 4284 buf_si_i[0] = 0; 4285 nrows = 0; 4286 for (i=owners[proc]; i<owners[proc+1]; i++){ 4287 anzi = ai[i+1] - ai[i]; 4288 if (anzi) { 4289 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4290 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4291 nrows++; 4292 } 4293 } 4294 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4295 k++; 4296 buf_si += len_si[proc]; 4297 } 4298 4299 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4300 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4301 4302 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4303 for (i=0; i<merge->nrecv; i++){ 4304 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4305 } 4306 4307 ierr = PetscFree(len_si);CHKERRQ(ierr); 4308 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4309 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4310 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4311 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4312 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4313 ierr = PetscFree(status);CHKERRQ(ierr); 4314 4315 /* compute a local seq matrix in each processor */ 4316 /*----------------------------------------------*/ 4317 /* allocate bi array and free space for accumulating nonzero column info */ 4318 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4319 bi[0] = 0; 4320 4321 /* create and initialize a linked list */ 4322 nlnk = N+1; 4323 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4324 4325 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4326 len = 0; 4327 len = ai[owners[rank+1]] - ai[owners[rank]]; 4328 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4329 current_space = free_space; 4330 4331 /* determine symbolic info for each local row */ 4332 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4333 nextrow = buf_ri_k + merge->nrecv; 4334 nextai = nextrow + merge->nrecv; 4335 for (k=0; k<merge->nrecv; k++){ 4336 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4337 nrows = *buf_ri_k[k]; 4338 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4339 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4340 } 4341 4342 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4343 len = 0; 4344 for (i=0;i<m;i++) { 4345 bnzi = 0; 4346 /* add local non-zero cols of this proc's seqmat into lnk */ 4347 arow = owners[rank] + i; 4348 anzi = ai[arow+1] - ai[arow]; 4349 aj = a->j + ai[arow]; 4350 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4351 bnzi += nlnk; 4352 /* add received col data into lnk */ 4353 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4354 if (i == *nextrow[k]) { /* i-th row */ 4355 anzi = *(nextai[k]+1) - *nextai[k]; 4356 aj = buf_rj[k] + *nextai[k]; 4357 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4358 bnzi += nlnk; 4359 nextrow[k]++; nextai[k]++; 4360 } 4361 } 4362 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4363 4364 /* if free space is not available, make more free space */ 4365 if (current_space->local_remaining<bnzi) { 4366 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4367 nspacedouble++; 4368 } 4369 /* copy data into free space, then initialize lnk */ 4370 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4371 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4372 4373 current_space->array += bnzi; 4374 current_space->local_used += bnzi; 4375 current_space->local_remaining -= bnzi; 4376 4377 bi[i+1] = bi[i] + bnzi; 4378 } 4379 4380 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4381 4382 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4383 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4384 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4385 4386 /* create symbolic parallel matrix B_mpi */ 4387 /*---------------------------------------*/ 4388 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4389 if (n==PETSC_DECIDE) { 4390 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4391 } else { 4392 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4393 } 4394 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4395 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4396 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4397 4398 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4399 B_mpi->assembled = PETSC_FALSE; 4400 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4401 merge->bi = bi; 4402 merge->bj = bj; 4403 merge->buf_ri = buf_ri; 4404 merge->buf_rj = buf_rj; 4405 merge->coi = PETSC_NULL; 4406 merge->coj = PETSC_NULL; 4407 merge->owners_co = PETSC_NULL; 4408 4409 /* attach the supporting struct to B_mpi for reuse */ 4410 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4411 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4412 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4413 *mpimat = B_mpi; 4414 4415 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4416 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4417 PetscFunctionReturn(0); 4418 } 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "MatMerge_SeqsToMPI" 4422 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4423 { 4424 PetscErrorCode ierr; 4425 4426 PetscFunctionBegin; 4427 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4428 if (scall == MAT_INITIAL_MATRIX){ 4429 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4430 } 4431 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4432 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #undef __FUNCT__ 4437 #define __FUNCT__ "MatGetLocalMat" 4438 /*@ 4439 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4440 4441 Not Collective 4442 4443 Input Parameters: 4444 + A - the matrix 4445 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4446 4447 Output Parameter: 4448 . A_loc - the local sequential matrix generated 4449 4450 Level: developer 4451 4452 @*/ 4453 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4454 { 4455 PetscErrorCode ierr; 4456 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4457 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4458 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4459 MatScalar *aa=a->a,*ba=b->a,*cam; 4460 PetscScalar *ca; 4461 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4462 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4463 4464 PetscFunctionBegin; 4465 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4466 if (scall == MAT_INITIAL_MATRIX){ 4467 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4468 ci[0] = 0; 4469 for (i=0; i<am; i++){ 4470 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4471 } 4472 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4473 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4474 k = 0; 4475 for (i=0; i<am; i++) { 4476 ncols_o = bi[i+1] - bi[i]; 4477 ncols_d = ai[i+1] - ai[i]; 4478 /* off-diagonal portion of A */ 4479 for (jo=0; jo<ncols_o; jo++) { 4480 col = cmap[*bj]; 4481 if (col >= cstart) break; 4482 cj[k] = col; bj++; 4483 ca[k++] = *ba++; 4484 } 4485 /* diagonal portion of A */ 4486 for (j=0; j<ncols_d; j++) { 4487 cj[k] = cstart + *aj++; 4488 ca[k++] = *aa++; 4489 } 4490 /* off-diagonal portion of A */ 4491 for (j=jo; j<ncols_o; j++) { 4492 cj[k] = cmap[*bj++]; 4493 ca[k++] = *ba++; 4494 } 4495 } 4496 /* put together the new matrix */ 4497 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4498 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4499 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4500 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4501 mat->free_a = PETSC_TRUE; 4502 mat->free_ij = PETSC_TRUE; 4503 mat->nonew = 0; 4504 } else if (scall == MAT_REUSE_MATRIX){ 4505 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4506 ci = mat->i; cj = mat->j; cam = mat->a; 4507 for (i=0; i<am; i++) { 4508 /* off-diagonal portion of A */ 4509 ncols_o = bi[i+1] - bi[i]; 4510 for (jo=0; jo<ncols_o; jo++) { 4511 col = cmap[*bj]; 4512 if (col >= cstart) break; 4513 *cam++ = *ba++; bj++; 4514 } 4515 /* diagonal portion of A */ 4516 ncols_d = ai[i+1] - ai[i]; 4517 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4518 /* off-diagonal portion of A */ 4519 for (j=jo; j<ncols_o; j++) { 4520 *cam++ = *ba++; bj++; 4521 } 4522 } 4523 } else { 4524 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4525 } 4526 4527 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4528 PetscFunctionReturn(0); 4529 } 4530 4531 #undef __FUNCT__ 4532 #define __FUNCT__ "MatGetLocalMatCondensed" 4533 /*@C 4534 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4535 4536 Not Collective 4537 4538 Input Parameters: 4539 + A - the matrix 4540 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4541 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4542 4543 Output Parameter: 4544 . A_loc - the local sequential matrix generated 4545 4546 Level: developer 4547 4548 @*/ 4549 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4550 { 4551 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4552 PetscErrorCode ierr; 4553 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4554 IS isrowa,iscola; 4555 Mat *aloc; 4556 4557 PetscFunctionBegin; 4558 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4559 if (!row){ 4560 start = A->rmap->rstart; end = A->rmap->rend; 4561 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4562 } else { 4563 isrowa = *row; 4564 } 4565 if (!col){ 4566 start = A->cmap->rstart; 4567 cmap = a->garray; 4568 nzA = a->A->cmap->n; 4569 nzB = a->B->cmap->n; 4570 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4571 ncols = 0; 4572 for (i=0; i<nzB; i++) { 4573 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4574 else break; 4575 } 4576 imark = i; 4577 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4578 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4579 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4580 ierr = PetscFree(idx);CHKERRQ(ierr); 4581 } else { 4582 iscola = *col; 4583 } 4584 if (scall != MAT_INITIAL_MATRIX){ 4585 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4586 aloc[0] = *A_loc; 4587 } 4588 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4589 *A_loc = aloc[0]; 4590 ierr = PetscFree(aloc);CHKERRQ(ierr); 4591 if (!row){ 4592 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4593 } 4594 if (!col){ 4595 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4596 } 4597 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4598 PetscFunctionReturn(0); 4599 } 4600 4601 #undef __FUNCT__ 4602 #define __FUNCT__ "MatGetBrowsOfAcols" 4603 /*@C 4604 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4605 4606 Collective on Mat 4607 4608 Input Parameters: 4609 + A,B - the matrices in mpiaij format 4610 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4611 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4612 4613 Output Parameter: 4614 + rowb, colb - index sets of rows and columns of B to extract 4615 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4616 - B_seq - the sequential matrix generated 4617 4618 Level: developer 4619 4620 @*/ 4621 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4622 { 4623 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4624 PetscErrorCode ierr; 4625 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4626 IS isrowb,iscolb; 4627 Mat *bseq; 4628 4629 PetscFunctionBegin; 4630 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4631 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4632 } 4633 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4634 4635 if (scall == MAT_INITIAL_MATRIX){ 4636 start = A->cmap->rstart; 4637 cmap = a->garray; 4638 nzA = a->A->cmap->n; 4639 nzB = a->B->cmap->n; 4640 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4641 ncols = 0; 4642 for (i=0; i<nzB; i++) { /* row < local row index */ 4643 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4644 else break; 4645 } 4646 imark = i; 4647 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4648 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4649 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4650 ierr = PetscFree(idx);CHKERRQ(ierr); 4651 *brstart = imark; 4652 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4653 } else { 4654 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4655 isrowb = *rowb; iscolb = *colb; 4656 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4657 bseq[0] = *B_seq; 4658 } 4659 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4660 *B_seq = bseq[0]; 4661 ierr = PetscFree(bseq);CHKERRQ(ierr); 4662 if (!rowb){ 4663 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4664 } else { 4665 *rowb = isrowb; 4666 } 4667 if (!colb){ 4668 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4669 } else { 4670 *colb = iscolb; 4671 } 4672 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4673 PetscFunctionReturn(0); 4674 } 4675 4676 #undef __FUNCT__ 4677 #define __FUNCT__ "MatGetBrowsOfAoCols" 4678 /*@C 4679 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4680 of the OFF-DIAGONAL portion of local A 4681 4682 Collective on Mat 4683 4684 Input Parameters: 4685 + A,B - the matrices in mpiaij format 4686 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4687 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4688 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4689 4690 Output Parameter: 4691 + B_oth - the sequential matrix generated 4692 4693 Level: developer 4694 4695 @*/ 4696 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4697 { 4698 VecScatter_MPI_General *gen_to,*gen_from; 4699 PetscErrorCode ierr; 4700 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4701 Mat_SeqAIJ *b_oth; 4702 VecScatter ctx=a->Mvctx; 4703 MPI_Comm comm=((PetscObject)ctx)->comm; 4704 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4705 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4706 PetscScalar *rvalues,*svalues; 4707 MatScalar *b_otha,*bufa,*bufA; 4708 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4709 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4710 MPI_Status *sstatus,rstatus; 4711 PetscMPIInt jj; 4712 PetscInt *cols,sbs,rbs; 4713 PetscScalar *vals; 4714 4715 PetscFunctionBegin; 4716 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4717 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4718 } 4719 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4720 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4721 4722 gen_to = (VecScatter_MPI_General*)ctx->todata; 4723 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4724 rvalues = gen_from->values; /* holds the length of receiving row */ 4725 svalues = gen_to->values; /* holds the length of sending row */ 4726 nrecvs = gen_from->n; 4727 nsends = gen_to->n; 4728 4729 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4730 srow = gen_to->indices; /* local row index to be sent */ 4731 sstarts = gen_to->starts; 4732 sprocs = gen_to->procs; 4733 sstatus = gen_to->sstatus; 4734 sbs = gen_to->bs; 4735 rstarts = gen_from->starts; 4736 rprocs = gen_from->procs; 4737 rbs = gen_from->bs; 4738 4739 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4740 if (scall == MAT_INITIAL_MATRIX){ 4741 /* i-array */ 4742 /*---------*/ 4743 /* post receives */ 4744 for (i=0; i<nrecvs; i++){ 4745 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4746 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4747 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4748 } 4749 4750 /* pack the outgoing message */ 4751 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4752 rstartsj = sstartsj + nsends +1; 4753 sstartsj[0] = 0; rstartsj[0] = 0; 4754 len = 0; /* total length of j or a array to be sent */ 4755 k = 0; 4756 for (i=0; i<nsends; i++){ 4757 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4758 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4759 for (j=0; j<nrows; j++) { 4760 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4761 for (l=0; l<sbs; l++){ 4762 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4763 rowlen[j*sbs+l] = ncols; 4764 len += ncols; 4765 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4766 } 4767 k++; 4768 } 4769 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4770 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4771 } 4772 /* recvs and sends of i-array are completed */ 4773 i = nrecvs; 4774 while (i--) { 4775 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4776 } 4777 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4778 4779 /* allocate buffers for sending j and a arrays */ 4780 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4781 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4782 4783 /* create i-array of B_oth */ 4784 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4785 b_othi[0] = 0; 4786 len = 0; /* total length of j or a array to be received */ 4787 k = 0; 4788 for (i=0; i<nrecvs; i++){ 4789 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4790 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4791 for (j=0; j<nrows; j++) { 4792 b_othi[k+1] = b_othi[k] + rowlen[j]; 4793 len += rowlen[j]; k++; 4794 } 4795 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4796 } 4797 4798 /* allocate space for j and a arrrays of B_oth */ 4799 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4800 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4801 4802 /* j-array */ 4803 /*---------*/ 4804 /* post receives of j-array */ 4805 for (i=0; i<nrecvs; i++){ 4806 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4807 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4808 } 4809 4810 /* pack the outgoing message j-array */ 4811 k = 0; 4812 for (i=0; i<nsends; i++){ 4813 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4814 bufJ = bufj+sstartsj[i]; 4815 for (j=0; j<nrows; j++) { 4816 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4817 for (ll=0; ll<sbs; ll++){ 4818 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4819 for (l=0; l<ncols; l++){ 4820 *bufJ++ = cols[l]; 4821 } 4822 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4823 } 4824 } 4825 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4826 } 4827 4828 /* recvs and sends of j-array are completed */ 4829 i = nrecvs; 4830 while (i--) { 4831 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4832 } 4833 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4834 } else if (scall == MAT_REUSE_MATRIX){ 4835 sstartsj = *startsj; 4836 rstartsj = sstartsj + nsends +1; 4837 bufa = *bufa_ptr; 4838 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4839 b_otha = b_oth->a; 4840 } else { 4841 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4842 } 4843 4844 /* a-array */ 4845 /*---------*/ 4846 /* post receives of a-array */ 4847 for (i=0; i<nrecvs; i++){ 4848 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4849 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4850 } 4851 4852 /* pack the outgoing message a-array */ 4853 k = 0; 4854 for (i=0; i<nsends; i++){ 4855 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4856 bufA = bufa+sstartsj[i]; 4857 for (j=0; j<nrows; j++) { 4858 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4859 for (ll=0; ll<sbs; ll++){ 4860 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4861 for (l=0; l<ncols; l++){ 4862 *bufA++ = vals[l]; 4863 } 4864 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4865 } 4866 } 4867 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4868 } 4869 /* recvs and sends of a-array are completed */ 4870 i = nrecvs; 4871 while (i--) { 4872 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4873 } 4874 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4875 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4876 4877 if (scall == MAT_INITIAL_MATRIX){ 4878 /* put together the new matrix */ 4879 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4880 4881 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4882 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4883 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4884 b_oth->free_a = PETSC_TRUE; 4885 b_oth->free_ij = PETSC_TRUE; 4886 b_oth->nonew = 0; 4887 4888 ierr = PetscFree(bufj);CHKERRQ(ierr); 4889 if (!startsj || !bufa_ptr){ 4890 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4891 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4892 } else { 4893 *startsj = sstartsj; 4894 *bufa_ptr = bufa; 4895 } 4896 } 4897 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4898 PetscFunctionReturn(0); 4899 } 4900 4901 #undef __FUNCT__ 4902 #define __FUNCT__ "MatGetCommunicationStructs" 4903 /*@C 4904 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4905 4906 Not Collective 4907 4908 Input Parameters: 4909 . A - The matrix in mpiaij format 4910 4911 Output Parameter: 4912 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4913 . colmap - A map from global column index to local index into lvec 4914 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4915 4916 Level: developer 4917 4918 @*/ 4919 #if defined (PETSC_USE_CTABLE) 4920 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4921 #else 4922 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4923 #endif 4924 { 4925 Mat_MPIAIJ *a; 4926 4927 PetscFunctionBegin; 4928 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4929 PetscValidPointer(lvec, 2) 4930 PetscValidPointer(colmap, 3) 4931 PetscValidPointer(multScatter, 4) 4932 a = (Mat_MPIAIJ *) A->data; 4933 if (lvec) *lvec = a->lvec; 4934 if (colmap) *colmap = a->colmap; 4935 if (multScatter) *multScatter = a->Mvctx; 4936 PetscFunctionReturn(0); 4937 } 4938 4939 EXTERN_C_BEGIN 4940 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4941 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4942 EXTERN_C_END 4943 4944 #undef __FUNCT__ 4945 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4946 /* 4947 Computes (B'*A')' since computing B*A directly is untenable 4948 4949 n p p 4950 ( ) ( ) ( ) 4951 m ( A ) * n ( B ) = m ( C ) 4952 ( ) ( ) ( ) 4953 4954 */ 4955 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4956 { 4957 PetscErrorCode ierr; 4958 Mat At,Bt,Ct; 4959 4960 PetscFunctionBegin; 4961 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4962 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4963 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4964 ierr = MatDestroy(At);CHKERRQ(ierr); 4965 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4966 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4967 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4968 PetscFunctionReturn(0); 4969 } 4970 4971 #undef __FUNCT__ 4972 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4973 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4974 { 4975 PetscErrorCode ierr; 4976 PetscInt m=A->rmap->n,n=B->cmap->n; 4977 Mat Cmat; 4978 4979 PetscFunctionBegin; 4980 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4981 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4982 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4983 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4984 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4985 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4986 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4987 *C = Cmat; 4988 PetscFunctionReturn(0); 4989 } 4990 4991 /* ----------------------------------------------------------------*/ 4992 #undef __FUNCT__ 4993 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4994 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4995 { 4996 PetscErrorCode ierr; 4997 4998 PetscFunctionBegin; 4999 if (scall == MAT_INITIAL_MATRIX){ 5000 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5001 } 5002 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5003 PetscFunctionReturn(0); 5004 } 5005 5006 EXTERN_C_BEGIN 5007 #if defined(PETSC_HAVE_MUMPS) 5008 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5009 #endif 5010 #if defined(PETSC_HAVE_PASTIX) 5011 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5012 #endif 5013 #if defined(PETSC_HAVE_SUPERLU_DIST) 5014 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5015 #endif 5016 #if defined(PETSC_HAVE_SPOOLES) 5017 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5018 #endif 5019 EXTERN_C_END 5020 5021 /*MC 5022 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5023 5024 Options Database Keys: 5025 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5026 5027 Level: beginner 5028 5029 .seealso: MatCreateMPIAIJ() 5030 M*/ 5031 5032 EXTERN_C_BEGIN 5033 #undef __FUNCT__ 5034 #define __FUNCT__ "MatCreate_MPIAIJ" 5035 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5036 { 5037 Mat_MPIAIJ *b; 5038 PetscErrorCode ierr; 5039 PetscMPIInt size; 5040 5041 PetscFunctionBegin; 5042 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5043 5044 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5045 B->data = (void*)b; 5046 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5047 B->rmap->bs = 1; 5048 B->assembled = PETSC_FALSE; 5049 B->mapping = 0; 5050 5051 B->insertmode = NOT_SET_VALUES; 5052 b->size = size; 5053 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5054 5055 /* build cache for off array entries formed */ 5056 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5057 b->donotstash = PETSC_FALSE; 5058 b->colmap = 0; 5059 b->garray = 0; 5060 b->roworiented = PETSC_TRUE; 5061 5062 /* stuff used for matrix vector multiply */ 5063 b->lvec = PETSC_NULL; 5064 b->Mvctx = PETSC_NULL; 5065 5066 /* stuff for MatGetRow() */ 5067 b->rowindices = 0; 5068 b->rowvalues = 0; 5069 b->getrowactive = PETSC_FALSE; 5070 5071 #if defined(PETSC_HAVE_SPOOLES) 5072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5073 "MatGetFactor_mpiaij_spooles", 5074 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5075 #endif 5076 #if defined(PETSC_HAVE_MUMPS) 5077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5078 "MatGetFactor_mpiaij_mumps", 5079 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5080 #endif 5081 #if defined(PETSC_HAVE_PASTIX) 5082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5083 "MatGetFactor_mpiaij_pastix", 5084 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5085 #endif 5086 #if defined(PETSC_HAVE_SUPERLU_DIST) 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5088 "MatGetFactor_mpiaij_superlu_dist", 5089 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5090 #endif 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5092 "MatStoreValues_MPIAIJ", 5093 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5095 "MatRetrieveValues_MPIAIJ", 5096 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5098 "MatGetDiagonalBlock_MPIAIJ", 5099 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5101 "MatIsTranspose_MPIAIJ", 5102 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5104 "MatMPIAIJSetPreallocation_MPIAIJ", 5105 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5107 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5108 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5110 "MatDiagonalScaleLocal_MPIAIJ", 5111 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5113 "MatConvert_MPIAIJ_MPICSRPERM", 5114 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5116 "MatConvert_MPIAIJ_MPICRL", 5117 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5119 "MatConvert_MPIAIJ_MPISBAIJ", 5120 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5122 "MatMatMult_MPIDense_MPIAIJ", 5123 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5125 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5126 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5128 "MatMatMultNumeric_MPIDense_MPIAIJ", 5129 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5130 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 EXTERN_C_END 5134 5135 #undef __FUNCT__ 5136 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5137 /*@ 5138 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5139 and "off-diagonal" part of the matrix in CSR format. 5140 5141 Collective on MPI_Comm 5142 5143 Input Parameters: 5144 + comm - MPI communicator 5145 . m - number of local rows (Cannot be PETSC_DECIDE) 5146 . n - This value should be the same as the local size used in creating the 5147 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5148 calculated if N is given) For square matrices n is almost always m. 5149 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5150 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5151 . i - row indices for "diagonal" portion of matrix 5152 . j - column indices 5153 . a - matrix values 5154 . oi - row indices for "off-diagonal" portion of matrix 5155 . oj - column indices 5156 - oa - matrix values 5157 5158 Output Parameter: 5159 . mat - the matrix 5160 5161 Level: advanced 5162 5163 Notes: 5164 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5165 5166 The i and j indices are 0 based 5167 5168 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5169 5170 This sets local rows and cannot be used to set off-processor values. 5171 5172 You cannot later use MatSetValues() to change values in this matrix. 5173 5174 .keywords: matrix, aij, compressed row, sparse, parallel 5175 5176 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5177 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5178 @*/ 5179 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5180 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5181 { 5182 PetscErrorCode ierr; 5183 Mat_MPIAIJ *maij; 5184 5185 PetscFunctionBegin; 5186 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5187 if (i[0]) { 5188 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5189 } 5190 if (oi[0]) { 5191 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5192 } 5193 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5194 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5195 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5196 maij = (Mat_MPIAIJ*) (*mat)->data; 5197 maij->donotstash = PETSC_TRUE; 5198 (*mat)->preallocated = PETSC_TRUE; 5199 5200 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5201 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5202 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5203 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5204 5205 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5206 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5207 5208 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5209 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5210 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5211 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5212 5213 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5214 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5215 PetscFunctionReturn(0); 5216 } 5217 5218 /* 5219 Special version for direct calls from Fortran 5220 */ 5221 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5222 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5223 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5224 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5225 #endif 5226 5227 /* Change these macros so can be used in void function */ 5228 #undef CHKERRQ 5229 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5230 #undef SETERRQ2 5231 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5232 #undef SETERRQ 5233 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5234 5235 EXTERN_C_BEGIN 5236 #undef __FUNCT__ 5237 #define __FUNCT__ "matsetvaluesmpiaij_" 5238 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5239 { 5240 Mat mat = *mmat; 5241 PetscInt m = *mm, n = *mn; 5242 InsertMode addv = *maddv; 5243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5244 PetscScalar value; 5245 PetscErrorCode ierr; 5246 5247 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5248 if (mat->insertmode == NOT_SET_VALUES) { 5249 mat->insertmode = addv; 5250 } 5251 #if defined(PETSC_USE_DEBUG) 5252 else if (mat->insertmode != addv) { 5253 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5254 } 5255 #endif 5256 { 5257 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5258 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5259 PetscTruth roworiented = aij->roworiented; 5260 5261 /* Some Variables required in the macro */ 5262 Mat A = aij->A; 5263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5264 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5265 MatScalar *aa = a->a; 5266 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5267 Mat B = aij->B; 5268 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5269 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5270 MatScalar *ba = b->a; 5271 5272 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5273 PetscInt nonew = a->nonew; 5274 MatScalar *ap1,*ap2; 5275 5276 PetscFunctionBegin; 5277 for (i=0; i<m; i++) { 5278 if (im[i] < 0) continue; 5279 #if defined(PETSC_USE_DEBUG) 5280 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5281 #endif 5282 if (im[i] >= rstart && im[i] < rend) { 5283 row = im[i] - rstart; 5284 lastcol1 = -1; 5285 rp1 = aj + ai[row]; 5286 ap1 = aa + ai[row]; 5287 rmax1 = aimax[row]; 5288 nrow1 = ailen[row]; 5289 low1 = 0; 5290 high1 = nrow1; 5291 lastcol2 = -1; 5292 rp2 = bj + bi[row]; 5293 ap2 = ba + bi[row]; 5294 rmax2 = bimax[row]; 5295 nrow2 = bilen[row]; 5296 low2 = 0; 5297 high2 = nrow2; 5298 5299 for (j=0; j<n; j++) { 5300 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5301 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5302 if (in[j] >= cstart && in[j] < cend){ 5303 col = in[j] - cstart; 5304 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5305 } else if (in[j] < 0) continue; 5306 #if defined(PETSC_USE_DEBUG) 5307 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5308 #endif 5309 else { 5310 if (mat->was_assembled) { 5311 if (!aij->colmap) { 5312 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5313 } 5314 #if defined (PETSC_USE_CTABLE) 5315 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5316 col--; 5317 #else 5318 col = aij->colmap[in[j]] - 1; 5319 #endif 5320 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5321 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5322 col = in[j]; 5323 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5324 B = aij->B; 5325 b = (Mat_SeqAIJ*)B->data; 5326 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5327 rp2 = bj + bi[row]; 5328 ap2 = ba + bi[row]; 5329 rmax2 = bimax[row]; 5330 nrow2 = bilen[row]; 5331 low2 = 0; 5332 high2 = nrow2; 5333 bm = aij->B->rmap->n; 5334 ba = b->a; 5335 } 5336 } else col = in[j]; 5337 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5338 } 5339 } 5340 } else { 5341 if (!aij->donotstash) { 5342 if (roworiented) { 5343 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5344 } else { 5345 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5346 } 5347 } 5348 } 5349 }} 5350 PetscFunctionReturnVoid(); 5351 } 5352 EXTERN_C_END 5353 5354