1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) { 468 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 } 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 643 source = lens + nrecvs; 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree(lens);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatView_MPIAIJ_Binary" 926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 927 { 928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 929 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 930 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 931 PetscErrorCode ierr; 932 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 933 int fd; 934 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 935 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 936 PetscScalar *column_values; 937 938 PetscFunctionBegin; 939 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 940 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 941 nz = A->nz + B->nz; 942 if (!rank) { 943 header[0] = MAT_FILE_COOKIE; 944 header[1] = mat->rmap->N; 945 header[2] = mat->cmap->N; 946 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 947 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 948 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 949 /* get largest number of rows any processor has */ 950 rlen = mat->rmap->n; 951 range = mat->rmap->range; 952 for (i=1; i<size; i++) { 953 rlen = PetscMax(rlen,range[i+1] - range[i]); 954 } 955 } else { 956 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 957 rlen = mat->rmap->n; 958 } 959 960 /* load up the local row counts */ 961 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 962 for (i=0; i<mat->rmap->n; i++) { 963 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 964 } 965 966 /* store the row lengths to the file */ 967 if (!rank) { 968 MPI_Status status; 969 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 970 for (i=1; i<size; i++) { 971 rlen = range[i+1] - range[i]; 972 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 973 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 974 } 975 } else { 976 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 } 978 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 979 980 /* load up the local column indices */ 981 nzmax = nz; /* )th processor needs space a largest processor needs */ 982 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 983 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 984 cnt = 0; 985 for (i=0; i<mat->rmap->n; i++) { 986 for (j=B->i[i]; j<B->i[i+1]; j++) { 987 if ( (col = garray[B->j[j]]) > cstart) break; 988 column_indices[cnt++] = col; 989 } 990 for (k=A->i[i]; k<A->i[i+1]; k++) { 991 column_indices[cnt++] = A->j[k] + cstart; 992 } 993 for (; j<B->i[i+1]; j++) { 994 column_indices[cnt++] = garray[B->j[j]]; 995 } 996 } 997 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 998 999 /* store the column indices to the file */ 1000 if (!rank) { 1001 MPI_Status status; 1002 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1003 for (i=1; i<size; i++) { 1004 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1005 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1006 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1007 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1008 } 1009 } else { 1010 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1011 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1014 1015 /* load up the local column values */ 1016 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1017 cnt = 0; 1018 for (i=0; i<mat->rmap->n; i++) { 1019 for (j=B->i[i]; j<B->i[i+1]; j++) { 1020 if ( garray[B->j[j]] > cstart) break; 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 for (k=A->i[i]; k<A->i[i+1]; k++) { 1024 column_values[cnt++] = A->a[k]; 1025 } 1026 for (; j<B->i[i+1]; j++) { 1027 column_values[cnt++] = B->a[j]; 1028 } 1029 } 1030 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1031 1032 /* store the column values to the file */ 1033 if (!rank) { 1034 MPI_Status status; 1035 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1036 for (i=1; i<size; i++) { 1037 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1038 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1039 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1040 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1041 } 1042 } else { 1043 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1044 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 } 1046 ierr = PetscFree(column_values);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1053 { 1054 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1055 PetscErrorCode ierr; 1056 PetscMPIInt rank = aij->rank,size = aij->size; 1057 PetscTruth isdraw,iascii,isbinary; 1058 PetscViewer sviewer; 1059 PetscViewerFormat format; 1060 1061 PetscFunctionBegin; 1062 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1065 if (iascii) { 1066 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1067 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1068 MatInfo info; 1069 PetscTruth inodes; 1070 1071 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1072 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1073 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1074 if (!inodes) { 1075 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1076 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1077 } else { 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1079 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1080 } 1081 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1083 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1087 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1090 PetscInt inodecount,inodelimit,*inodes; 1091 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1092 if (inodes) { 1093 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1094 } else { 1095 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1096 } 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1099 PetscFunctionReturn(0); 1100 } 1101 } else if (isbinary) { 1102 if (size == 1) { 1103 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1104 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1105 } else { 1106 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1107 } 1108 PetscFunctionReturn(0); 1109 } else if (isdraw) { 1110 PetscDraw draw; 1111 PetscTruth isnull; 1112 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1113 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1114 } 1115 1116 if (size == 1) { 1117 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1118 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1119 } else { 1120 /* assemble the entire matrix onto first processor. */ 1121 Mat A; 1122 Mat_SeqAIJ *Aloc; 1123 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1124 MatScalar *a; 1125 1126 if (mat->rmap->N > 1024) { 1127 PetscTruth flg = PETSC_FALSE; 1128 1129 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1130 if (!flg) { 1131 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1132 } 1133 } 1134 1135 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1140 } 1141 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1142 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1143 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1144 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1145 1146 /* copy over the A part */ 1147 Aloc = (Mat_SeqAIJ*)aij->A->data; 1148 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1149 row = mat->rmap->rstart; 1150 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1151 for (i=0; i<m; i++) { 1152 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1153 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1154 } 1155 aj = Aloc->j; 1156 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1157 1158 /* copy over the B part */ 1159 Aloc = (Mat_SeqAIJ*)aij->B->data; 1160 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1161 row = mat->rmap->rstart; 1162 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1163 ct = cols; 1164 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1165 for (i=0; i<m; i++) { 1166 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1167 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1168 } 1169 ierr = PetscFree(ct);CHKERRQ(ierr); 1170 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1171 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 /* 1173 Everyone has to call to draw the matrix since the graphics waits are 1174 synchronized across all processors that share the PetscDraw object 1175 */ 1176 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1177 if (!rank) { 1178 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1179 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1182 ierr = MatDestroy(A);CHKERRQ(ierr); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatView_MPIAIJ" 1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1190 { 1191 PetscErrorCode ierr; 1192 PetscTruth iascii,isdraw,issocket,isbinary; 1193 1194 PetscFunctionBegin; 1195 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199 if (iascii || isdraw || isbinary || issocket) { 1200 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1201 } else { 1202 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatRelax_MPIAIJ" 1209 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1210 { 1211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212 PetscErrorCode ierr; 1213 Vec bb1 = 0; 1214 PetscTruth hasop; 1215 1216 PetscFunctionBegin; 1217 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1218 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1219 } 1220 1221 if (flag == SOR_APPLY_UPPER) { 1222 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1227 if (flag & SOR_ZERO_INITIAL_GUESS) { 1228 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1229 its--; 1230 } 1231 1232 while (its--) { 1233 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 1236 /* update rhs: bb1 = bb - B*x */ 1237 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1238 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1239 1240 /* local sweep */ 1241 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1242 } 1243 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1244 if (flag & SOR_ZERO_INITIAL_GUESS) { 1245 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1246 its--; 1247 } 1248 while (its--) { 1249 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 1252 /* update rhs: bb1 = bb - B*x */ 1253 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1254 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1255 1256 /* local sweep */ 1257 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1258 } 1259 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1260 if (flag & SOR_ZERO_INITIAL_GUESS) { 1261 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1262 its--; 1263 } 1264 while (its--) { 1265 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 1268 /* update rhs: bb1 = bb - B*x */ 1269 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1270 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1271 1272 /* local sweep */ 1273 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1274 } 1275 } else if (flag & SOR_EISENSTAT) { 1276 Vec xx1; 1277 1278 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1279 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1280 1281 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 if (!mat->diag) { 1284 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1285 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1286 } 1287 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1288 if (hasop) { 1289 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1290 } else { 1291 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1292 } 1293 ierr = VecAYPX(bb1,-1.0,bb);CHKERRQ(ierr); 1294 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1295 1296 /* local sweep */ 1297 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1298 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1299 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1300 } else { 1301 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1302 } 1303 1304 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatPermute_MPIAIJ" 1310 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1311 { 1312 MPI_Comm comm,pcomm; 1313 PetscInt first,local_size,nrows; 1314 const PetscInt *rows; 1315 PetscMPIInt size; 1316 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1321 /* make a collective version of 'rowp' */ 1322 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1323 if (pcomm==comm) { 1324 crowp = rowp; 1325 } else { 1326 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1327 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1328 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1329 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1330 } 1331 /* collect the global row permutation and invert it */ 1332 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1333 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1334 if (pcomm!=comm) { 1335 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1336 } 1337 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1338 /* get the local target indices */ 1339 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1340 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1341 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1342 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1343 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1345 /* the column permutation is so much easier; 1346 make a local version of 'colp' and invert it */ 1347 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1348 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1349 if (size==1) { 1350 lcolp = colp; 1351 } else { 1352 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1353 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1354 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1355 } 1356 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1357 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1358 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1359 if (size>1) { 1360 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1361 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1362 } 1363 /* now we just get the submatrix */ 1364 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1365 /* clean up */ 1366 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1367 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1373 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1374 { 1375 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1376 Mat A = mat->A,B = mat->B; 1377 PetscErrorCode ierr; 1378 PetscReal isend[5],irecv[5]; 1379 1380 PetscFunctionBegin; 1381 info->block_size = 1.0; 1382 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1383 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1384 isend[3] = info->memory; isend[4] = info->mallocs; 1385 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1386 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1387 isend[3] += info->memory; isend[4] += info->mallocs; 1388 if (flag == MAT_LOCAL) { 1389 info->nz_used = isend[0]; 1390 info->nz_allocated = isend[1]; 1391 info->nz_unneeded = isend[2]; 1392 info->memory = isend[3]; 1393 info->mallocs = isend[4]; 1394 } else if (flag == MAT_GLOBAL_MAX) { 1395 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1396 info->nz_used = irecv[0]; 1397 info->nz_allocated = irecv[1]; 1398 info->nz_unneeded = irecv[2]; 1399 info->memory = irecv[3]; 1400 info->mallocs = irecv[4]; 1401 } else if (flag == MAT_GLOBAL_SUM) { 1402 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1403 info->nz_used = irecv[0]; 1404 info->nz_allocated = irecv[1]; 1405 info->nz_unneeded = irecv[2]; 1406 info->memory = irecv[3]; 1407 info->mallocs = irecv[4]; 1408 } 1409 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1410 info->fill_ratio_needed = 0; 1411 info->factor_mallocs = 0; 1412 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatSetOption_MPIAIJ" 1418 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1419 { 1420 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1421 PetscErrorCode ierr; 1422 1423 PetscFunctionBegin; 1424 switch (op) { 1425 case MAT_NEW_NONZERO_LOCATIONS: 1426 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1427 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1428 case MAT_KEEP_NONZERO_PATTERN: 1429 case MAT_NEW_NONZERO_LOCATION_ERR: 1430 case MAT_USE_INODES: 1431 case MAT_IGNORE_ZERO_ENTRIES: 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_ROW_ORIENTED: 1436 a->roworiented = flg; 1437 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1438 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1439 break; 1440 case MAT_NEW_DIAGONALS: 1441 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1442 break; 1443 case MAT_IGNORE_OFF_PROC_ENTRIES: 1444 a->donotstash = PETSC_TRUE; 1445 break; 1446 case MAT_SYMMETRIC: 1447 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1448 break; 1449 case MAT_STRUCTURALLY_SYMMETRIC: 1450 case MAT_HERMITIAN: 1451 case MAT_SYMMETRY_ETERNAL: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 default: 1455 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1456 } 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatGetRow_MPIAIJ" 1462 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1463 { 1464 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1465 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1466 PetscErrorCode ierr; 1467 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1468 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1469 PetscInt *cmap,*idx_p; 1470 1471 PetscFunctionBegin; 1472 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1473 mat->getrowactive = PETSC_TRUE; 1474 1475 if (!mat->rowvalues && (idx || v)) { 1476 /* 1477 allocate enough space to hold information from the longest row. 1478 */ 1479 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1480 PetscInt max = 1,tmp; 1481 for (i=0; i<matin->rmap->n; i++) { 1482 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1483 if (max < tmp) { max = tmp; } 1484 } 1485 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1486 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1487 } 1488 1489 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1490 lrow = row - rstart; 1491 1492 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1493 if (!v) {pvA = 0; pvB = 0;} 1494 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1495 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1497 nztot = nzA + nzB; 1498 1499 cmap = mat->garray; 1500 if (v || idx) { 1501 if (nztot) { 1502 /* Sort by increasing column numbers, assuming A and B already sorted */ 1503 PetscInt imark = -1; 1504 if (v) { 1505 *v = v_p = mat->rowvalues; 1506 for (i=0; i<nzB; i++) { 1507 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1508 else break; 1509 } 1510 imark = i; 1511 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1512 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1513 } 1514 if (idx) { 1515 *idx = idx_p = mat->rowindices; 1516 if (imark > -1) { 1517 for (i=0; i<imark; i++) { 1518 idx_p[i] = cmap[cworkB[i]]; 1519 } 1520 } else { 1521 for (i=0; i<nzB; i++) { 1522 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1523 else break; 1524 } 1525 imark = i; 1526 } 1527 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1528 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1529 } 1530 } else { 1531 if (idx) *idx = 0; 1532 if (v) *v = 0; 1533 } 1534 } 1535 *nz = nztot; 1536 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1537 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1543 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1544 { 1545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1546 1547 PetscFunctionBegin; 1548 if (!aij->getrowactive) { 1549 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1550 } 1551 aij->getrowactive = PETSC_FALSE; 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatNorm_MPIAIJ" 1557 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1558 { 1559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1560 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1561 PetscErrorCode ierr; 1562 PetscInt i,j,cstart = mat->cmap->rstart; 1563 PetscReal sum = 0.0; 1564 MatScalar *v; 1565 1566 PetscFunctionBegin; 1567 if (aij->size == 1) { 1568 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1569 } else { 1570 if (type == NORM_FROBENIUS) { 1571 v = amat->a; 1572 for (i=0; i<amat->nz; i++) { 1573 #if defined(PETSC_USE_COMPLEX) 1574 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1575 #else 1576 sum += (*v)*(*v); v++; 1577 #endif 1578 } 1579 v = bmat->a; 1580 for (i=0; i<bmat->nz; i++) { 1581 #if defined(PETSC_USE_COMPLEX) 1582 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1583 #else 1584 sum += (*v)*(*v); v++; 1585 #endif 1586 } 1587 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1588 *norm = sqrt(*norm); 1589 } else if (type == NORM_1) { /* max column norm */ 1590 PetscReal *tmp,*tmp2; 1591 PetscInt *jj,*garray = aij->garray; 1592 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1593 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1594 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1595 *norm = 0.0; 1596 v = amat->a; jj = amat->j; 1597 for (j=0; j<amat->nz; j++) { 1598 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1599 } 1600 v = bmat->a; jj = bmat->j; 1601 for (j=0; j<bmat->nz; j++) { 1602 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1603 } 1604 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1605 for (j=0; j<mat->cmap->N; j++) { 1606 if (tmp2[j] > *norm) *norm = tmp2[j]; 1607 } 1608 ierr = PetscFree(tmp);CHKERRQ(ierr); 1609 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1610 } else if (type == NORM_INFINITY) { /* max row norm */ 1611 PetscReal ntemp = 0.0; 1612 for (j=0; j<aij->A->rmap->n; j++) { 1613 v = amat->a + amat->i[j]; 1614 sum = 0.0; 1615 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 v = bmat->a + bmat->i[j]; 1619 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1620 sum += PetscAbsScalar(*v); v++; 1621 } 1622 if (sum > ntemp) ntemp = sum; 1623 } 1624 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1625 } else { 1626 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1627 } 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIAIJ" 1634 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1637 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1638 PetscErrorCode ierr; 1639 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1640 PetscInt cstart=A->cmap->rstart,ncol; 1641 Mat B; 1642 MatScalar *array; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 1647 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1648 ai = Aloc->i; aj = Aloc->j; 1649 bi = Bloc->i; bj = Bloc->j; 1650 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1651 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1652 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1653 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1654 for (i=0; i<ai[ma]; i++){ 1655 d_nnz[aj[i]] ++; 1656 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1657 } 1658 1659 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1660 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1661 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1662 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1663 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1664 } else { 1665 B = *matout; 1666 } 1667 1668 /* copy over the A part */ 1669 array = Aloc->a; 1670 row = A->rmap->rstart; 1671 for (i=0; i<ma; i++) { 1672 ncol = ai[i+1]-ai[i]; 1673 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1674 row++; array += ncol; aj += ncol; 1675 } 1676 aj = Aloc->j; 1677 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1678 1679 /* copy over the B part */ 1680 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1681 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1682 array = Bloc->a; 1683 row = A->rmap->rstart; 1684 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1685 cols_tmp = cols; 1686 for (i=0; i<mb; i++) { 1687 ncol = bi[i+1]-bi[i]; 1688 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1689 row++; array += ncol; cols_tmp += ncol; 1690 } 1691 ierr = PetscFree(cols);CHKERRQ(ierr); 1692 1693 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1696 *matout = B; 1697 } else { 1698 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1705 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1706 { 1707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1708 Mat a = aij->A,b = aij->B; 1709 PetscErrorCode ierr; 1710 PetscInt s1,s2,s3; 1711 1712 PetscFunctionBegin; 1713 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1714 if (rr) { 1715 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1716 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1717 /* Overlap communication with computation. */ 1718 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 } 1720 if (ll) { 1721 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1722 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1723 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1724 } 1725 /* scale the diagonal block */ 1726 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1727 1728 if (rr) { 1729 /* Do a scatter end and then right scale the off-diagonal block */ 1730 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1731 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1732 } 1733 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1739 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1740 { 1741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1746 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1747 PetscFunctionReturn(0); 1748 } 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1751 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1752 { 1753 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatEqual_MPIAIJ" 1763 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1764 { 1765 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1766 Mat a,b,c,d; 1767 PetscTruth flg; 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 a = matA->A; b = matA->B; 1772 c = matB->A; d = matB->B; 1773 1774 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1775 if (flg) { 1776 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1777 } 1778 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatCopy_MPIAIJ" 1784 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1785 { 1786 PetscErrorCode ierr; 1787 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1788 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1789 1790 PetscFunctionBegin; 1791 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1792 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1793 /* because of the column compression in the off-processor part of the matrix a->B, 1794 the number of columns in a->B and b->B may be different, hence we cannot call 1795 the MatCopy() directly on the two parts. If need be, we can provide a more 1796 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1797 then copying the submatrices */ 1798 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1799 } else { 1800 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1801 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1802 } 1803 PetscFunctionReturn(0); 1804 } 1805 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1808 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1809 { 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #include "petscblaslapack.h" 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatAXPY_MPIAIJ" 1820 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1821 { 1822 PetscErrorCode ierr; 1823 PetscInt i; 1824 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1825 PetscBLASInt bnz,one=1; 1826 Mat_SeqAIJ *x,*y; 1827 1828 PetscFunctionBegin; 1829 if (str == SAME_NONZERO_PATTERN) { 1830 PetscScalar alpha = a; 1831 x = (Mat_SeqAIJ *)xx->A->data; 1832 y = (Mat_SeqAIJ *)yy->A->data; 1833 bnz = PetscBLASIntCast(x->nz); 1834 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1835 x = (Mat_SeqAIJ *)xx->B->data; 1836 y = (Mat_SeqAIJ *)yy->B->data; 1837 bnz = PetscBLASIntCast(x->nz); 1838 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1839 } else if (str == SUBSET_NONZERO_PATTERN) { 1840 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1841 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 if (y->xtoy && y->XtoY != xx->B) { 1845 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1846 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1847 } 1848 if (!y->xtoy) { /* get xtoy */ 1849 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1850 y->XtoY = xx->B; 1851 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1852 } 1853 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1854 } else { 1855 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1856 } 1857 PetscFunctionReturn(0); 1858 } 1859 1860 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatConjugate_MPIAIJ" 1864 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1865 { 1866 #if defined(PETSC_USE_COMPLEX) 1867 PetscErrorCode ierr; 1868 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1869 1870 PetscFunctionBegin; 1871 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1872 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1873 #else 1874 PetscFunctionBegin; 1875 #endif 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatRealPart_MPIAIJ" 1881 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1882 { 1883 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1888 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1894 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1895 { 1896 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1901 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 #ifdef PETSC_HAVE_PBGL 1906 1907 #include <boost/parallel/mpi/bsp_process_group.hpp> 1908 #include <boost/graph/distributed/ilu_default_graph.hpp> 1909 #include <boost/graph/distributed/ilu_0_block.hpp> 1910 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1911 #include <boost/graph/distributed/petsc/interface.hpp> 1912 #include <boost/multi_array.hpp> 1913 #include <boost/parallel/distributed_property_map->hpp> 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1917 /* 1918 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1919 */ 1920 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1921 { 1922 namespace petsc = boost::distributed::petsc; 1923 1924 namespace graph_dist = boost::graph::distributed; 1925 using boost::graph::distributed::ilu_default::process_group_type; 1926 using boost::graph::ilu_permuted; 1927 1928 PetscTruth row_identity, col_identity; 1929 PetscContainer c; 1930 PetscInt m, n, M, N; 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1935 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1936 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1937 if (!row_identity || !col_identity) { 1938 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1939 } 1940 1941 process_group_type pg; 1942 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1943 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1944 lgraph_type& level_graph = *lgraph_p; 1945 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1946 1947 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1948 ilu_permuted(level_graph); 1949 1950 /* put together the new matrix */ 1951 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1952 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1953 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1954 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1955 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1956 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 1959 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1960 ierr = PetscContainerSetPointer(c, lgraph_p); 1961 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1967 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1968 { 1969 PetscFunctionBegin; 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatSolve_MPIAIJ" 1975 /* 1976 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1977 */ 1978 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1979 { 1980 namespace graph_dist = boost::graph::distributed; 1981 1982 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1983 lgraph_type* lgraph_p; 1984 PetscContainer c; 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1989 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1990 ierr = VecCopy(b, x);CHKERRQ(ierr); 1991 1992 PetscScalar* array_x; 1993 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1994 PetscInt sx; 1995 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1996 1997 PetscScalar* array_b; 1998 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1999 PetscInt sb; 2000 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2001 2002 lgraph_type& level_graph = *lgraph_p; 2003 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2004 2005 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2006 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2007 ref_x(array_x, boost::extents[num_vertices(graph)]); 2008 2009 typedef boost::iterator_property_map<array_ref_type::iterator, 2010 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2011 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2012 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2013 2014 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2015 2016 PetscFunctionReturn(0); 2017 } 2018 #endif 2019 2020 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2021 PetscInt nzlocal,nsends,nrecvs; 2022 PetscMPIInt *send_rank; 2023 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2024 PetscScalar *sbuf_a,**rbuf_a; 2025 PetscErrorCode (*MatDestroy)(Mat); 2026 } Mat_Redundant; 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2030 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2031 { 2032 PetscErrorCode ierr; 2033 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2034 PetscInt i; 2035 2036 PetscFunctionBegin; 2037 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2038 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2040 for (i=0; i<redund->nrecvs; i++){ 2041 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2042 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2043 } 2044 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2045 ierr = PetscFree(redund);CHKERRQ(ierr); 2046 PetscFunctionReturn(0); 2047 } 2048 2049 #undef __FUNCT__ 2050 #define __FUNCT__ "MatDestroy_MatRedundant" 2051 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2052 { 2053 PetscErrorCode ierr; 2054 PetscContainer container; 2055 Mat_Redundant *redund=PETSC_NULL; 2056 2057 PetscFunctionBegin; 2058 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2059 if (container) { 2060 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2061 } else { 2062 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2063 } 2064 A->ops->destroy = redund->MatDestroy; 2065 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2066 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2067 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2073 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2074 { 2075 PetscMPIInt rank,size; 2076 MPI_Comm comm=((PetscObject)mat)->comm; 2077 PetscErrorCode ierr; 2078 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2079 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2080 PetscInt *rowrange=mat->rmap->range; 2081 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2082 Mat A=aij->A,B=aij->B,C=*matredundant; 2083 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2084 PetscScalar *sbuf_a; 2085 PetscInt nzlocal=a->nz+b->nz; 2086 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2087 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2088 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2089 MatScalar *aworkA,*aworkB; 2090 PetscScalar *vals; 2091 PetscMPIInt tag1,tag2,tag3,imdex; 2092 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2093 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2094 MPI_Status recv_status,*send_status; 2095 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2096 PetscInt **rbuf_j=PETSC_NULL; 2097 PetscScalar **rbuf_a=PETSC_NULL; 2098 Mat_Redundant *redund=PETSC_NULL; 2099 PetscContainer container; 2100 2101 PetscFunctionBegin; 2102 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2103 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2104 2105 if (reuse == MAT_REUSE_MATRIX) { 2106 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2107 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2108 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2109 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2110 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2111 if (container) { 2112 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2113 } else { 2114 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2115 } 2116 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2117 2118 nsends = redund->nsends; 2119 nrecvs = redund->nrecvs; 2120 send_rank = redund->send_rank; recv_rank = send_rank + size; 2121 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2122 sbuf_j = redund->sbuf_j; 2123 sbuf_a = redund->sbuf_a; 2124 rbuf_j = redund->rbuf_j; 2125 rbuf_a = redund->rbuf_a; 2126 } 2127 2128 if (reuse == MAT_INITIAL_MATRIX){ 2129 PetscMPIInt subrank,subsize; 2130 PetscInt nleftover,np_subcomm; 2131 /* get the destination processors' id send_rank, nsends and nrecvs */ 2132 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2133 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2134 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2135 recv_rank = send_rank + size; 2136 np_subcomm = size/nsubcomm; 2137 nleftover = size - nsubcomm*np_subcomm; 2138 nsends = 0; nrecvs = 0; 2139 for (i=0; i<size; i++){ /* i=rank*/ 2140 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2141 send_rank[nsends] = i; nsends++; 2142 recv_rank[nrecvs++] = i; 2143 } 2144 } 2145 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2146 i = size-nleftover-1; 2147 j = 0; 2148 while (j < nsubcomm - nleftover){ 2149 send_rank[nsends++] = i; 2150 i--; j++; 2151 } 2152 } 2153 2154 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2155 for (i=0; i<nleftover; i++){ 2156 recv_rank[nrecvs++] = size-nleftover+i; 2157 } 2158 } 2159 2160 /* allocate sbuf_j, sbuf_a */ 2161 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2162 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2163 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2164 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2165 2166 /* copy mat's local entries into the buffers */ 2167 if (reuse == MAT_INITIAL_MATRIX){ 2168 rownz_max = 0; 2169 rptr = sbuf_j; 2170 cols = sbuf_j + rend-rstart + 1; 2171 vals = sbuf_a; 2172 rptr[0] = 0; 2173 for (i=0; i<rend-rstart; i++){ 2174 row = i + rstart; 2175 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2176 ncols = nzA + nzB; 2177 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2178 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2179 /* load the column indices for this row into cols */ 2180 lwrite = 0; 2181 for (l=0; l<nzB; l++) { 2182 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2183 vals[lwrite] = aworkB[l]; 2184 cols[lwrite++] = ctmp; 2185 } 2186 } 2187 for (l=0; l<nzA; l++){ 2188 vals[lwrite] = aworkA[l]; 2189 cols[lwrite++] = cstart + cworkA[l]; 2190 } 2191 for (l=0; l<nzB; l++) { 2192 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2193 vals[lwrite] = aworkB[l]; 2194 cols[lwrite++] = ctmp; 2195 } 2196 } 2197 vals += ncols; 2198 cols += ncols; 2199 rptr[i+1] = rptr[i] + ncols; 2200 if (rownz_max < ncols) rownz_max = ncols; 2201 } 2202 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2203 } else { /* only copy matrix values into sbuf_a */ 2204 rptr = sbuf_j; 2205 vals = sbuf_a; 2206 rptr[0] = 0; 2207 for (i=0; i<rend-rstart; i++){ 2208 row = i + rstart; 2209 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2210 ncols = nzA + nzB; 2211 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2212 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2213 lwrite = 0; 2214 for (l=0; l<nzB; l++) { 2215 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2216 } 2217 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2218 for (l=0; l<nzB; l++) { 2219 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2220 } 2221 vals += ncols; 2222 rptr[i+1] = rptr[i] + ncols; 2223 } 2224 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2225 2226 /* send nzlocal to others, and recv other's nzlocal */ 2227 /*--------------------------------------------------*/ 2228 if (reuse == MAT_INITIAL_MATRIX){ 2229 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2230 s_waits2 = s_waits3 + nsends; 2231 s_waits1 = s_waits2 + nsends; 2232 r_waits1 = s_waits1 + nsends; 2233 r_waits2 = r_waits1 + nrecvs; 2234 r_waits3 = r_waits2 + nrecvs; 2235 } else { 2236 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2237 r_waits3 = s_waits3 + nsends; 2238 } 2239 2240 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2241 if (reuse == MAT_INITIAL_MATRIX){ 2242 /* get new tags to keep the communication clean */ 2243 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2244 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2245 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2246 rbuf_nz = sbuf_nz + nsends; 2247 2248 /* post receives of other's nzlocal */ 2249 for (i=0; i<nrecvs; i++){ 2250 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2251 } 2252 /* send nzlocal to others */ 2253 for (i=0; i<nsends; i++){ 2254 sbuf_nz[i] = nzlocal; 2255 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2256 } 2257 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2258 count = nrecvs; 2259 while (count) { 2260 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2261 recv_rank[imdex] = recv_status.MPI_SOURCE; 2262 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2263 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2264 2265 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2266 rbuf_nz[imdex] += i + 2; 2267 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2268 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2269 count--; 2270 } 2271 /* wait on sends of nzlocal */ 2272 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2273 /* send mat->i,j to others, and recv from other's */ 2274 /*------------------------------------------------*/ 2275 for (i=0; i<nsends; i++){ 2276 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2277 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2278 } 2279 /* wait on receives of mat->i,j */ 2280 /*------------------------------*/ 2281 count = nrecvs; 2282 while (count) { 2283 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2284 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2285 count--; 2286 } 2287 /* wait on sends of mat->i,j */ 2288 /*---------------------------*/ 2289 if (nsends) { 2290 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2291 } 2292 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2293 2294 /* post receives, send and receive mat->a */ 2295 /*----------------------------------------*/ 2296 for (imdex=0; imdex<nrecvs; imdex++) { 2297 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2298 } 2299 for (i=0; i<nsends; i++){ 2300 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2301 } 2302 count = nrecvs; 2303 while (count) { 2304 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2305 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2306 count--; 2307 } 2308 if (nsends) { 2309 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2310 } 2311 2312 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2313 2314 /* create redundant matrix */ 2315 /*-------------------------*/ 2316 if (reuse == MAT_INITIAL_MATRIX){ 2317 /* compute rownz_max for preallocation */ 2318 for (imdex=0; imdex<nrecvs; imdex++){ 2319 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2320 rptr = rbuf_j[imdex]; 2321 for (i=0; i<j; i++){ 2322 ncols = rptr[i+1] - rptr[i]; 2323 if (rownz_max < ncols) rownz_max = ncols; 2324 } 2325 } 2326 2327 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2328 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2329 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2330 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2331 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2332 } else { 2333 C = *matredundant; 2334 } 2335 2336 /* insert local matrix entries */ 2337 rptr = sbuf_j; 2338 cols = sbuf_j + rend-rstart + 1; 2339 vals = sbuf_a; 2340 for (i=0; i<rend-rstart; i++){ 2341 row = i + rstart; 2342 ncols = rptr[i+1] - rptr[i]; 2343 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2344 vals += ncols; 2345 cols += ncols; 2346 } 2347 /* insert received matrix entries */ 2348 for (imdex=0; imdex<nrecvs; imdex++){ 2349 rstart = rowrange[recv_rank[imdex]]; 2350 rend = rowrange[recv_rank[imdex]+1]; 2351 rptr = rbuf_j[imdex]; 2352 cols = rbuf_j[imdex] + rend-rstart + 1; 2353 vals = rbuf_a[imdex]; 2354 for (i=0; i<rend-rstart; i++){ 2355 row = i + rstart; 2356 ncols = rptr[i+1] - rptr[i]; 2357 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2358 vals += ncols; 2359 cols += ncols; 2360 } 2361 } 2362 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2363 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2364 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2365 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2366 if (reuse == MAT_INITIAL_MATRIX){ 2367 PetscContainer container; 2368 *matredundant = C; 2369 /* create a supporting struct and attach it to C for reuse */ 2370 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2371 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2372 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2373 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2374 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2375 2376 redund->nzlocal = nzlocal; 2377 redund->nsends = nsends; 2378 redund->nrecvs = nrecvs; 2379 redund->send_rank = send_rank; 2380 redund->sbuf_nz = sbuf_nz; 2381 redund->sbuf_j = sbuf_j; 2382 redund->sbuf_a = sbuf_a; 2383 redund->rbuf_j = rbuf_j; 2384 redund->rbuf_a = rbuf_a; 2385 2386 redund->MatDestroy = C->ops->destroy; 2387 C->ops->destroy = MatDestroy_MatRedundant; 2388 } 2389 PetscFunctionReturn(0); 2390 } 2391 2392 #undef __FUNCT__ 2393 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2394 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2395 { 2396 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2397 PetscErrorCode ierr; 2398 PetscInt i,*idxb = 0; 2399 PetscScalar *va,*vb; 2400 Vec vtmp; 2401 2402 PetscFunctionBegin; 2403 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2404 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2405 if (idx) { 2406 for (i=0; i<A->rmap->n; i++) { 2407 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2408 } 2409 } 2410 2411 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2412 if (idx) { 2413 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2414 } 2415 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2416 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2417 2418 for (i=0; i<A->rmap->n; i++){ 2419 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2420 va[i] = vb[i]; 2421 if (idx) idx[i] = a->garray[idxb[i]]; 2422 } 2423 } 2424 2425 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2426 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2427 if (idxb) { 2428 ierr = PetscFree(idxb);CHKERRQ(ierr); 2429 } 2430 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2436 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2437 { 2438 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2439 PetscErrorCode ierr; 2440 PetscInt i,*idxb = 0; 2441 PetscScalar *va,*vb; 2442 Vec vtmp; 2443 2444 PetscFunctionBegin; 2445 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2446 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2447 if (idx) { 2448 for (i=0; i<A->cmap->n; i++) { 2449 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2450 } 2451 } 2452 2453 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2454 if (idx) { 2455 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2456 } 2457 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2458 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2459 2460 for (i=0; i<A->rmap->n; i++){ 2461 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2462 va[i] = vb[i]; 2463 if (idx) idx[i] = a->garray[idxb[i]]; 2464 } 2465 } 2466 2467 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2468 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2469 if (idxb) { 2470 ierr = PetscFree(idxb);CHKERRQ(ierr); 2471 } 2472 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 #undef __FUNCT__ 2477 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2478 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2479 { 2480 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2481 PetscInt n = A->rmap->n; 2482 PetscInt cstart = A->cmap->rstart; 2483 PetscInt *cmap = mat->garray; 2484 PetscInt *diagIdx, *offdiagIdx; 2485 Vec diagV, offdiagV; 2486 PetscScalar *a, *diagA, *offdiagA; 2487 PetscInt r; 2488 PetscErrorCode ierr; 2489 2490 PetscFunctionBegin; 2491 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2492 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2493 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2494 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2495 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2496 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2497 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2498 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2499 for(r = 0; r < n; ++r) { 2500 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2501 a[r] = diagA[r]; 2502 idx[r] = cstart + diagIdx[r]; 2503 } else { 2504 a[r] = offdiagA[r]; 2505 idx[r] = cmap[offdiagIdx[r]]; 2506 } 2507 } 2508 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2509 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2510 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2511 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2512 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2513 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2519 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2520 { 2521 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2522 PetscInt n = A->rmap->n; 2523 PetscInt cstart = A->cmap->rstart; 2524 PetscInt *cmap = mat->garray; 2525 PetscInt *diagIdx, *offdiagIdx; 2526 Vec diagV, offdiagV; 2527 PetscScalar *a, *diagA, *offdiagA; 2528 PetscInt r; 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2533 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2534 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2535 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2536 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2537 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2538 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2539 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2540 for(r = 0; r < n; ++r) { 2541 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 idx[r] = cstart + diagIdx[r]; 2544 } else { 2545 a[r] = offdiagA[r]; 2546 idx[r] = cmap[offdiagIdx[r]]; 2547 } 2548 } 2549 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2551 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2552 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2553 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2554 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2560 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2561 { 2562 PetscErrorCode ierr; 2563 Mat *dummy; 2564 2565 PetscFunctionBegin; 2566 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2567 *newmat = *dummy; 2568 ierr = PetscFree(dummy);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2573 /* -------------------------------------------------------------------*/ 2574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2575 MatGetRow_MPIAIJ, 2576 MatRestoreRow_MPIAIJ, 2577 MatMult_MPIAIJ, 2578 /* 4*/ MatMultAdd_MPIAIJ, 2579 MatMultTranspose_MPIAIJ, 2580 MatMultTransposeAdd_MPIAIJ, 2581 #ifdef PETSC_HAVE_PBGL 2582 MatSolve_MPIAIJ, 2583 #else 2584 0, 2585 #endif 2586 0, 2587 0, 2588 /*10*/ 0, 2589 0, 2590 0, 2591 MatRelax_MPIAIJ, 2592 MatTranspose_MPIAIJ, 2593 /*15*/ MatGetInfo_MPIAIJ, 2594 MatEqual_MPIAIJ, 2595 MatGetDiagonal_MPIAIJ, 2596 MatDiagonalScale_MPIAIJ, 2597 MatNorm_MPIAIJ, 2598 /*20*/ MatAssemblyBegin_MPIAIJ, 2599 MatAssemblyEnd_MPIAIJ, 2600 MatSetOption_MPIAIJ, 2601 MatZeroEntries_MPIAIJ, 2602 /*24*/ MatZeroRows_MPIAIJ, 2603 0, 2604 #ifdef PETSC_HAVE_PBGL 2605 0, 2606 #else 2607 0, 2608 #endif 2609 0, 2610 0, 2611 /*29*/ MatSetUpPreallocation_MPIAIJ, 2612 #ifdef PETSC_HAVE_PBGL 2613 0, 2614 #else 2615 0, 2616 #endif 2617 0, 2618 0, 2619 0, 2620 /*34*/ MatDuplicate_MPIAIJ, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*39*/ MatAXPY_MPIAIJ, 2626 MatGetSubMatrices_MPIAIJ, 2627 MatIncreaseOverlap_MPIAIJ, 2628 MatGetValues_MPIAIJ, 2629 MatCopy_MPIAIJ, 2630 /*44*/ MatGetRowMax_MPIAIJ, 2631 MatScale_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 /*49*/ MatSetBlockSize_MPIAIJ, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*54*/ MatFDColoringCreate_MPIAIJ, 2641 0, 2642 MatSetUnfactored_MPIAIJ, 2643 MatPermute_MPIAIJ, 2644 0, 2645 /*59*/ MatGetSubMatrix_MPIAIJ, 2646 MatDestroy_MPIAIJ, 2647 MatView_MPIAIJ, 2648 0, 2649 0, 2650 /*64*/ 0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2656 MatGetRowMinAbs_MPIAIJ, 2657 0, 2658 MatSetColoring_MPIAIJ, 2659 #if defined(PETSC_HAVE_ADIC) 2660 MatSetValuesAdic_MPIAIJ, 2661 #else 2662 0, 2663 #endif 2664 MatSetValuesAdifor_MPIAIJ, 2665 /*75*/ MatFDColoringApply_AIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*80*/ 0, 2671 0, 2672 0, 2673 /*83*/ MatLoad_MPIAIJ, 2674 0, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2680 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2681 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2682 MatPtAP_Basic, 2683 MatPtAPSymbolic_MPIAIJ, 2684 /*94*/ MatPtAPNumeric_MPIAIJ, 2685 0, 2686 0, 2687 0, 2688 0, 2689 /*99*/ 0, 2690 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2691 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2692 MatConjugate_MPIAIJ, 2693 0, 2694 /*104*/MatSetValuesRow_MPIAIJ, 2695 MatRealPart_MPIAIJ, 2696 MatImaginaryPart_MPIAIJ, 2697 0, 2698 0, 2699 /*109*/0, 2700 MatGetRedundantMatrix_MPIAIJ, 2701 MatGetRowMin_MPIAIJ, 2702 0, 2703 0, 2704 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2705 0, 2706 0, 2707 0, 2708 0, 2709 0 2710 }; 2711 2712 /* ----------------------------------------------------------------------------------------*/ 2713 2714 EXTERN_C_BEGIN 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2717 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2718 { 2719 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2720 PetscErrorCode ierr; 2721 2722 PetscFunctionBegin; 2723 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2724 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 EXTERN_C_END 2728 2729 EXTERN_C_BEGIN 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2732 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2733 { 2734 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2735 PetscErrorCode ierr; 2736 2737 PetscFunctionBegin; 2738 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2739 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 EXTERN_C_END 2743 2744 #include "petscpc.h" 2745 EXTERN_C_BEGIN 2746 #undef __FUNCT__ 2747 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2748 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2749 { 2750 Mat_MPIAIJ *b; 2751 PetscErrorCode ierr; 2752 PetscInt i; 2753 2754 PetscFunctionBegin; 2755 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2756 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2757 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2758 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2759 2760 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2761 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2762 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2763 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2764 if (d_nnz) { 2765 for (i=0; i<B->rmap->n; i++) { 2766 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2767 } 2768 } 2769 if (o_nnz) { 2770 for (i=0; i<B->rmap->n; i++) { 2771 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2772 } 2773 } 2774 b = (Mat_MPIAIJ*)B->data; 2775 2776 if (!B->preallocated) { 2777 /* Explicitly create 2 MATSEQAIJ matrices. */ 2778 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2779 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2780 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2781 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2782 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2783 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2784 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2785 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2786 } 2787 2788 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2789 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2790 B->preallocated = PETSC_TRUE; 2791 PetscFunctionReturn(0); 2792 } 2793 EXTERN_C_END 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2797 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2798 { 2799 Mat mat; 2800 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 *newmat = 0; 2805 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2806 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2807 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2808 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2809 a = (Mat_MPIAIJ*)mat->data; 2810 2811 mat->factor = matin->factor; 2812 mat->rmap->bs = matin->rmap->bs; 2813 mat->assembled = PETSC_TRUE; 2814 mat->insertmode = NOT_SET_VALUES; 2815 mat->preallocated = PETSC_TRUE; 2816 2817 a->size = oldmat->size; 2818 a->rank = oldmat->rank; 2819 a->donotstash = oldmat->donotstash; 2820 a->roworiented = oldmat->roworiented; 2821 a->rowindices = 0; 2822 a->rowvalues = 0; 2823 a->getrowactive = PETSC_FALSE; 2824 2825 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2826 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2827 2828 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2829 if (oldmat->colmap) { 2830 #if defined (PETSC_USE_CTABLE) 2831 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2832 #else 2833 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2834 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2835 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2836 #endif 2837 } else a->colmap = 0; 2838 if (oldmat->garray) { 2839 PetscInt len; 2840 len = oldmat->B->cmap->n; 2841 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2842 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2843 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2844 } else a->garray = 0; 2845 2846 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2848 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2850 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2852 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2854 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2855 *newmat = mat; 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #include "petscsys.h" 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatLoad_MPIAIJ" 2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2864 { 2865 Mat A; 2866 PetscScalar *vals,*svals; 2867 MPI_Comm comm = ((PetscObject)viewer)->comm; 2868 MPI_Status status; 2869 PetscErrorCode ierr; 2870 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2871 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2872 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2873 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2874 PetscInt cend,cstart,n,*rowners; 2875 int fd; 2876 2877 PetscFunctionBegin; 2878 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2880 if (!rank) { 2881 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2882 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2883 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2884 } 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 /* determine ownership of all rows */ 2889 m = M/size + ((M % size) > rank); 2890 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2891 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892 2893 /* First process needs enough room for process with most rows */ 2894 if (!rank) { 2895 mmax = rowners[1]; 2896 for (i=2; i<size; i++) { 2897 mmax = PetscMax(mmax,rowners[i]); 2898 } 2899 } else mmax = m; 2900 2901 rowners[0] = 0; 2902 for (i=2; i<=size; i++) { 2903 rowners[i] += rowners[i-1]; 2904 } 2905 rstart = rowners[rank]; 2906 rend = rowners[rank+1]; 2907 2908 /* distribute row lengths to all processors */ 2909 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2910 if (!rank) { 2911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2914 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2915 for (j=0; j<m; j++) { 2916 procsnz[0] += ourlens[j]; 2917 } 2918 for (i=1; i<size; i++) { 2919 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2920 /* calculate the number of nonzeros on each processor */ 2921 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2922 procsnz[i] += rowlengths[j]; 2923 } 2924 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2925 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926 } 2927 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928 } else { 2929 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2930 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2931 } 2932 2933 if (!rank) { 2934 /* determine max buffer needed and allocate it */ 2935 maxnz = 0; 2936 for (i=0; i<size; i++) { 2937 maxnz = PetscMax(maxnz,procsnz[i]); 2938 } 2939 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2940 2941 /* read in my part of the matrix column indices */ 2942 nz = procsnz[0]; 2943 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2944 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2945 2946 /* read in every one elses and ship off */ 2947 for (i=1; i<size; i++) { 2948 nz = procsnz[i]; 2949 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2950 mpicnt = PetscMPIIntCast(nz); 2951 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree(cols);CHKERRQ(ierr); 2954 } else { 2955 /* determine buffer space needed for message */ 2956 nz = 0; 2957 for (i=0; i<m; i++) { 2958 nz += ourlens[i]; 2959 } 2960 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2961 2962 /* receive message of column indices*/ 2963 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2964 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2965 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2966 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2967 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2968 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2969 } 2970 2971 /* determine column ownership if matrix is not square */ 2972 if (N != M) { 2973 n = N/size + ((N % size) > rank); 2974 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2975 cstart = cend - n; 2976 } else { 2977 cstart = rstart; 2978 cend = rend; 2979 n = cend - cstart; 2980 } 2981 2982 /* loop over local rows, determining number of off diagonal entries */ 2983 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2984 jj = 0; 2985 for (i=0; i<m; i++) { 2986 for (j=0; j<ourlens[i]; j++) { 2987 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2988 jj++; 2989 } 2990 } 2991 2992 /* create our matrix */ 2993 for (i=0; i<m; i++) { 2994 ourlens[i] -= offlens[i]; 2995 } 2996 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2997 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2998 ierr = MatSetType(A,type);CHKERRQ(ierr); 2999 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3000 3001 for (i=0; i<m; i++) { 3002 ourlens[i] += offlens[i]; 3003 } 3004 3005 if (!rank) { 3006 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3007 3008 /* read in my part of the matrix numerical values */ 3009 nz = procsnz[0]; 3010 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011 3012 /* insert into matrix */ 3013 jj = rstart; 3014 smycols = mycols; 3015 svals = vals; 3016 for (i=0; i<m; i++) { 3017 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3018 smycols += ourlens[i]; 3019 svals += ourlens[i]; 3020 jj++; 3021 } 3022 3023 /* read in other processors and ship out */ 3024 for (i=1; i<size; i++) { 3025 nz = procsnz[i]; 3026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3027 mpicnt = PetscMPIIntCast(nz); 3028 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3029 } 3030 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3031 } else { 3032 /* receive numeric values */ 3033 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3034 3035 /* receive message of values*/ 3036 mpicnt = PetscMPIIntCast(nz); 3037 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3039 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3040 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3041 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3042 3043 /* insert into matrix */ 3044 jj = rstart; 3045 smycols = mycols; 3046 svals = vals; 3047 for (i=0; i<m; i++) { 3048 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3049 smycols += ourlens[i]; 3050 svals += ourlens[i]; 3051 jj++; 3052 } 3053 } 3054 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3055 ierr = PetscFree(vals);CHKERRQ(ierr); 3056 ierr = PetscFree(mycols);CHKERRQ(ierr); 3057 ierr = PetscFree(rowners);CHKERRQ(ierr); 3058 3059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 *newmat = A; 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3067 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3068 { 3069 PetscErrorCode ierr; 3070 IS iscol_local; 3071 PetscInt csize; 3072 3073 PetscFunctionBegin; 3074 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3075 if (call == MAT_REUSE_MATRIX) { 3076 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3077 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3078 } else { 3079 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3080 } 3081 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3082 if (call == MAT_INITIAL_MATRIX) { 3083 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3084 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3085 } 3086 PetscFunctionReturn(0); 3087 } 3088 3089 #undef __FUNCT__ 3090 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3091 /* 3092 Not great since it makes two copies of the submatrix, first an SeqAIJ 3093 in local and then by concatenating the local matrices the end result. 3094 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3095 3096 Note: This requires a sequential iscol with all indices. 3097 */ 3098 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3099 { 3100 PetscErrorCode ierr; 3101 PetscMPIInt rank,size; 3102 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3103 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3104 Mat *local,M,Mreuse; 3105 MatScalar *vwork,*aa; 3106 MPI_Comm comm = ((PetscObject)mat)->comm; 3107 Mat_SeqAIJ *aij; 3108 3109 3110 PetscFunctionBegin; 3111 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3113 3114 if (call == MAT_REUSE_MATRIX) { 3115 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3116 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3117 local = &Mreuse; 3118 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3119 } else { 3120 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3121 Mreuse = *local; 3122 ierr = PetscFree(local);CHKERRQ(ierr); 3123 } 3124 3125 /* 3126 m - number of local rows 3127 n - number of columns (same on all processors) 3128 rstart - first row in new global matrix generated 3129 */ 3130 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3131 if (call == MAT_INITIAL_MATRIX) { 3132 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3133 ii = aij->i; 3134 jj = aij->j; 3135 3136 /* 3137 Determine the number of non-zeros in the diagonal and off-diagonal 3138 portions of the matrix in order to do correct preallocation 3139 */ 3140 3141 /* first get start and end of "diagonal" columns */ 3142 if (csize == PETSC_DECIDE) { 3143 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3144 if (mglobal == n) { /* square matrix */ 3145 nlocal = m; 3146 } else { 3147 nlocal = n/size + ((n % size) > rank); 3148 } 3149 } else { 3150 nlocal = csize; 3151 } 3152 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3153 rstart = rend - nlocal; 3154 if (rank == size - 1 && rend != n) { 3155 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3156 } 3157 3158 /* next, compute all the lengths */ 3159 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3160 olens = dlens + m; 3161 for (i=0; i<m; i++) { 3162 jend = ii[i+1] - ii[i]; 3163 olen = 0; 3164 dlen = 0; 3165 for (j=0; j<jend; j++) { 3166 if (*jj < rstart || *jj >= rend) olen++; 3167 else dlen++; 3168 jj++; 3169 } 3170 olens[i] = olen; 3171 dlens[i] = dlen; 3172 } 3173 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3174 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3175 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3176 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3177 ierr = PetscFree(dlens);CHKERRQ(ierr); 3178 } else { 3179 PetscInt ml,nl; 3180 3181 M = *newmat; 3182 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3183 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3184 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3185 /* 3186 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3187 rather than the slower MatSetValues(). 3188 */ 3189 M->was_assembled = PETSC_TRUE; 3190 M->assembled = PETSC_FALSE; 3191 } 3192 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3193 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3194 ii = aij->i; 3195 jj = aij->j; 3196 aa = aij->a; 3197 for (i=0; i<m; i++) { 3198 row = rstart + i; 3199 nz = ii[i+1] - ii[i]; 3200 cwork = jj; jj += nz; 3201 vwork = aa; aa += nz; 3202 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3203 } 3204 3205 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3206 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3207 *newmat = M; 3208 3209 /* save submatrix used in processor for next request */ 3210 if (call == MAT_INITIAL_MATRIX) { 3211 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3212 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3213 } 3214 3215 PetscFunctionReturn(0); 3216 } 3217 3218 EXTERN_C_BEGIN 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3221 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3222 { 3223 PetscInt m,cstart, cend,j,nnz,i,d; 3224 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3225 const PetscInt *JJ; 3226 PetscScalar *values; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3231 3232 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3233 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3234 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3235 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3236 m = B->rmap->n; 3237 cstart = B->cmap->rstart; 3238 cend = B->cmap->rend; 3239 rstart = B->rmap->rstart; 3240 3241 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3242 o_nnz = d_nnz + m; 3243 3244 #if defined(PETSC_USE_DEBUGGING) 3245 for (i=0; i<m; i++) { 3246 nnz = Ii[i+1]- Ii[i]; 3247 JJ = J + Ii[i]; 3248 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3249 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3250 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3251 for (j=1; j<nnz; j++) { 3252 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3253 } 3254 } 3255 #endif 3256 3257 for (i=0; i<m; i++) { 3258 nnz = Ii[i+1]- Ii[i]; 3259 JJ = J + Ii[i]; 3260 nnz_max = PetscMax(nnz_max,nnz); 3261 for (j=0; j<nnz; j++) { 3262 if (*JJ >= cstart) break; 3263 JJ++; 3264 } 3265 d = 0; 3266 for (; j<nnz; j++) { 3267 if (*JJ++ >= cend) break; 3268 d++; 3269 } 3270 d_nnz[i] = d; 3271 o_nnz[i] = nnz - d; 3272 } 3273 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3274 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3275 3276 if (v) values = (PetscScalar*)v; 3277 else { 3278 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3279 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3280 } 3281 3282 for (i=0; i<m; i++) { 3283 ii = i + rstart; 3284 nnz = Ii[i+1]- Ii[i]; 3285 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3286 } 3287 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 3290 if (!v) { 3291 ierr = PetscFree(values);CHKERRQ(ierr); 3292 } 3293 PetscFunctionReturn(0); 3294 } 3295 EXTERN_C_END 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3299 /*@ 3300 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3301 (the default parallel PETSc format). 3302 3303 Collective on MPI_Comm 3304 3305 Input Parameters: 3306 + B - the matrix 3307 . i - the indices into j for the start of each local row (starts with zero) 3308 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3309 - v - optional values in the matrix 3310 3311 Level: developer 3312 3313 Notes: 3314 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3315 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3316 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3317 3318 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3319 3320 The format which is used for the sparse matrix input, is equivalent to a 3321 row-major ordering.. i.e for the following matrix, the input data expected is 3322 as shown: 3323 3324 1 0 0 3325 2 0 3 P0 3326 ------- 3327 4 5 6 P1 3328 3329 Process0 [P0]: rows_owned=[0,1] 3330 i = {0,1,3} [size = nrow+1 = 2+1] 3331 j = {0,0,2} [size = nz = 6] 3332 v = {1,2,3} [size = nz = 6] 3333 3334 Process1 [P1]: rows_owned=[2] 3335 i = {0,3} [size = nrow+1 = 1+1] 3336 j = {0,1,2} [size = nz = 6] 3337 v = {4,5,6} [size = nz = 6] 3338 3339 The column indices for each row MUST be sorted. 3340 3341 .keywords: matrix, aij, compressed row, sparse, parallel 3342 3343 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3344 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3345 @*/ 3346 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3347 { 3348 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3349 3350 PetscFunctionBegin; 3351 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3352 if (f) { 3353 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3354 } 3355 PetscFunctionReturn(0); 3356 } 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3360 /*@C 3361 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3362 (the default parallel PETSc format). For good matrix assembly performance 3363 the user should preallocate the matrix storage by setting the parameters 3364 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3365 performance can be increased by more than a factor of 50. 3366 3367 Collective on MPI_Comm 3368 3369 Input Parameters: 3370 + A - the matrix 3371 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3372 (same value is used for all local rows) 3373 . d_nnz - array containing the number of nonzeros in the various rows of the 3374 DIAGONAL portion of the local submatrix (possibly different for each row) 3375 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3376 The size of this array is equal to the number of local rows, i.e 'm'. 3377 You must leave room for the diagonal entry even if it is zero. 3378 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3379 submatrix (same value is used for all local rows). 3380 - o_nnz - array containing the number of nonzeros in the various rows of the 3381 OFF-DIAGONAL portion of the local submatrix (possibly different for 3382 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3383 structure. The size of this array is equal to the number 3384 of local rows, i.e 'm'. 3385 3386 If the *_nnz parameter is given then the *_nz parameter is ignored 3387 3388 The AIJ format (also called the Yale sparse matrix format or 3389 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3390 storage. The stored row and column indices begin with zero. See the users manual for details. 3391 3392 The parallel matrix is partitioned such that the first m0 rows belong to 3393 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3394 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3395 3396 The DIAGONAL portion of the local submatrix of a processor can be defined 3397 as the submatrix which is obtained by extraction the part corresponding 3398 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3399 first row that belongs to the processor, and r2 is the last row belonging 3400 to the this processor. This is a square mxm matrix. The remaining portion 3401 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3402 3403 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3404 3405 You can call MatGetInfo() to get information on how effective the preallocation was; 3406 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3407 You can also run with the option -info and look for messages with the string 3408 malloc in them to see if additional memory allocation was needed. 3409 3410 Example usage: 3411 3412 Consider the following 8x8 matrix with 34 non-zero values, that is 3413 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3414 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3415 as follows: 3416 3417 .vb 3418 1 2 0 | 0 3 0 | 0 4 3419 Proc0 0 5 6 | 7 0 0 | 8 0 3420 9 0 10 | 11 0 0 | 12 0 3421 ------------------------------------- 3422 13 0 14 | 15 16 17 | 0 0 3423 Proc1 0 18 0 | 19 20 21 | 0 0 3424 0 0 0 | 22 23 0 | 24 0 3425 ------------------------------------- 3426 Proc2 25 26 27 | 0 0 28 | 29 0 3427 30 0 0 | 31 32 33 | 0 34 3428 .ve 3429 3430 This can be represented as a collection of submatrices as: 3431 3432 .vb 3433 A B C 3434 D E F 3435 G H I 3436 .ve 3437 3438 Where the submatrices A,B,C are owned by proc0, D,E,F are 3439 owned by proc1, G,H,I are owned by proc2. 3440 3441 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3442 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3443 The 'M','N' parameters are 8,8, and have the same values on all procs. 3444 3445 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3446 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3447 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3448 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3449 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3450 matrix, ans [DF] as another SeqAIJ matrix. 3451 3452 When d_nz, o_nz parameters are specified, d_nz storage elements are 3453 allocated for every row of the local diagonal submatrix, and o_nz 3454 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3455 One way to choose d_nz and o_nz is to use the max nonzerors per local 3456 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3457 In this case, the values of d_nz,o_nz are: 3458 .vb 3459 proc0 : dnz = 2, o_nz = 2 3460 proc1 : dnz = 3, o_nz = 2 3461 proc2 : dnz = 1, o_nz = 4 3462 .ve 3463 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3464 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3465 for proc3. i.e we are using 12+15+10=37 storage locations to store 3466 34 values. 3467 3468 When d_nnz, o_nnz parameters are specified, the storage is specified 3469 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3470 In the above case the values for d_nnz,o_nnz are: 3471 .vb 3472 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3473 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3474 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3475 .ve 3476 Here the space allocated is sum of all the above values i.e 34, and 3477 hence pre-allocation is perfect. 3478 3479 Level: intermediate 3480 3481 .keywords: matrix, aij, compressed row, sparse, parallel 3482 3483 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3484 MPIAIJ, MatGetInfo() 3485 @*/ 3486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3487 { 3488 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3489 3490 PetscFunctionBegin; 3491 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3492 if (f) { 3493 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3494 } 3495 PetscFunctionReturn(0); 3496 } 3497 3498 #undef __FUNCT__ 3499 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3500 /*@ 3501 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3502 CSR format the local rows. 3503 3504 Collective on MPI_Comm 3505 3506 Input Parameters: 3507 + comm - MPI communicator 3508 . m - number of local rows (Cannot be PETSC_DECIDE) 3509 . n - This value should be the same as the local size used in creating the 3510 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3511 calculated if N is given) For square matrices n is almost always m. 3512 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3513 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3514 . i - row indices 3515 . j - column indices 3516 - a - matrix values 3517 3518 Output Parameter: 3519 . mat - the matrix 3520 3521 Level: intermediate 3522 3523 Notes: 3524 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3525 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3526 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3527 3528 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3529 3530 The format which is used for the sparse matrix input, is equivalent to a 3531 row-major ordering.. i.e for the following matrix, the input data expected is 3532 as shown: 3533 3534 1 0 0 3535 2 0 3 P0 3536 ------- 3537 4 5 6 P1 3538 3539 Process0 [P0]: rows_owned=[0,1] 3540 i = {0,1,3} [size = nrow+1 = 2+1] 3541 j = {0,0,2} [size = nz = 6] 3542 v = {1,2,3} [size = nz = 6] 3543 3544 Process1 [P1]: rows_owned=[2] 3545 i = {0,3} [size = nrow+1 = 1+1] 3546 j = {0,1,2} [size = nz = 6] 3547 v = {4,5,6} [size = nz = 6] 3548 3549 .keywords: matrix, aij, compressed row, sparse, parallel 3550 3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3552 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3553 @*/ 3554 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3555 { 3556 PetscErrorCode ierr; 3557 3558 PetscFunctionBegin; 3559 if (i[0]) { 3560 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3561 } 3562 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3563 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3564 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3565 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3566 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3567 PetscFunctionReturn(0); 3568 } 3569 3570 #undef __FUNCT__ 3571 #define __FUNCT__ "MatCreateMPIAIJ" 3572 /*@C 3573 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3574 (the default parallel PETSc format). For good matrix assembly performance 3575 the user should preallocate the matrix storage by setting the parameters 3576 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3577 performance can be increased by more than a factor of 50. 3578 3579 Collective on MPI_Comm 3580 3581 Input Parameters: 3582 + comm - MPI communicator 3583 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3584 This value should be the same as the local size used in creating the 3585 y vector for the matrix-vector product y = Ax. 3586 . n - This value should be the same as the local size used in creating the 3587 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3588 calculated if N is given) For square matrices n is almost always m. 3589 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3590 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3591 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3592 (same value is used for all local rows) 3593 . d_nnz - array containing the number of nonzeros in the various rows of the 3594 DIAGONAL portion of the local submatrix (possibly different for each row) 3595 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3596 The size of this array is equal to the number of local rows, i.e 'm'. 3597 You must leave room for the diagonal entry even if it is zero. 3598 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3599 submatrix (same value is used for all local rows). 3600 - o_nnz - array containing the number of nonzeros in the various rows of the 3601 OFF-DIAGONAL portion of the local submatrix (possibly different for 3602 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3603 structure. The size of this array is equal to the number 3604 of local rows, i.e 'm'. 3605 3606 Output Parameter: 3607 . A - the matrix 3608 3609 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3610 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3611 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3612 3613 Notes: 3614 If the *_nnz parameter is given then the *_nz parameter is ignored 3615 3616 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3617 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3618 storage requirements for this matrix. 3619 3620 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3621 processor than it must be used on all processors that share the object for 3622 that argument. 3623 3624 The user MUST specify either the local or global matrix dimensions 3625 (possibly both). 3626 3627 The parallel matrix is partitioned across processors such that the 3628 first m0 rows belong to process 0, the next m1 rows belong to 3629 process 1, the next m2 rows belong to process 2 etc.. where 3630 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3631 values corresponding to [m x N] submatrix. 3632 3633 The columns are logically partitioned with the n0 columns belonging 3634 to 0th partition, the next n1 columns belonging to the next 3635 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3636 3637 The DIAGONAL portion of the local submatrix on any given processor 3638 is the submatrix corresponding to the rows and columns m,n 3639 corresponding to the given processor. i.e diagonal matrix on 3640 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3641 etc. The remaining portion of the local submatrix [m x (N-n)] 3642 constitute the OFF-DIAGONAL portion. The example below better 3643 illustrates this concept. 3644 3645 For a square global matrix we define each processor's diagonal portion 3646 to be its local rows and the corresponding columns (a square submatrix); 3647 each processor's off-diagonal portion encompasses the remainder of the 3648 local matrix (a rectangular submatrix). 3649 3650 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3651 3652 When calling this routine with a single process communicator, a matrix of 3653 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3654 type of communicator, use the construction mechanism: 3655 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3656 3657 By default, this format uses inodes (identical nodes) when possible. 3658 We search for consecutive rows with the same nonzero structure, thereby 3659 reusing matrix information to achieve increased efficiency. 3660 3661 Options Database Keys: 3662 + -mat_no_inode - Do not use inodes 3663 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3664 - -mat_aij_oneindex - Internally use indexing starting at 1 3665 rather than 0. Note that when calling MatSetValues(), 3666 the user still MUST index entries starting at 0! 3667 3668 3669 Example usage: 3670 3671 Consider the following 8x8 matrix with 34 non-zero values, that is 3672 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3673 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3674 as follows: 3675 3676 .vb 3677 1 2 0 | 0 3 0 | 0 4 3678 Proc0 0 5 6 | 7 0 0 | 8 0 3679 9 0 10 | 11 0 0 | 12 0 3680 ------------------------------------- 3681 13 0 14 | 15 16 17 | 0 0 3682 Proc1 0 18 0 | 19 20 21 | 0 0 3683 0 0 0 | 22 23 0 | 24 0 3684 ------------------------------------- 3685 Proc2 25 26 27 | 0 0 28 | 29 0 3686 30 0 0 | 31 32 33 | 0 34 3687 .ve 3688 3689 This can be represented as a collection of submatrices as: 3690 3691 .vb 3692 A B C 3693 D E F 3694 G H I 3695 .ve 3696 3697 Where the submatrices A,B,C are owned by proc0, D,E,F are 3698 owned by proc1, G,H,I are owned by proc2. 3699 3700 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3701 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3702 The 'M','N' parameters are 8,8, and have the same values on all procs. 3703 3704 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3705 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3706 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3707 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3708 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3709 matrix, ans [DF] as another SeqAIJ matrix. 3710 3711 When d_nz, o_nz parameters are specified, d_nz storage elements are 3712 allocated for every row of the local diagonal submatrix, and o_nz 3713 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3714 One way to choose d_nz and o_nz is to use the max nonzerors per local 3715 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3716 In this case, the values of d_nz,o_nz are: 3717 .vb 3718 proc0 : dnz = 2, o_nz = 2 3719 proc1 : dnz = 3, o_nz = 2 3720 proc2 : dnz = 1, o_nz = 4 3721 .ve 3722 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3723 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3724 for proc3. i.e we are using 12+15+10=37 storage locations to store 3725 34 values. 3726 3727 When d_nnz, o_nnz parameters are specified, the storage is specified 3728 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3729 In the above case the values for d_nnz,o_nnz are: 3730 .vb 3731 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3732 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3733 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3734 .ve 3735 Here the space allocated is sum of all the above values i.e 34, and 3736 hence pre-allocation is perfect. 3737 3738 Level: intermediate 3739 3740 .keywords: matrix, aij, compressed row, sparse, parallel 3741 3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3743 MPIAIJ, MatCreateMPIAIJWithArrays() 3744 @*/ 3745 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3746 { 3747 PetscErrorCode ierr; 3748 PetscMPIInt size; 3749 3750 PetscFunctionBegin; 3751 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3752 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3754 if (size > 1) { 3755 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3756 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3759 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3766 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3767 { 3768 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3769 3770 PetscFunctionBegin; 3771 *Ad = a->A; 3772 *Ao = a->B; 3773 *colmap = a->garray; 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3779 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3780 { 3781 PetscErrorCode ierr; 3782 PetscInt i; 3783 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3784 3785 PetscFunctionBegin; 3786 if (coloring->ctype == IS_COLORING_GLOBAL) { 3787 ISColoringValue *allcolors,*colors; 3788 ISColoring ocoloring; 3789 3790 /* set coloring for diagonal portion */ 3791 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3792 3793 /* set coloring for off-diagonal portion */ 3794 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3795 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3796 for (i=0; i<a->B->cmap->n; i++) { 3797 colors[i] = allcolors[a->garray[i]]; 3798 } 3799 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3800 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3801 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3802 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3803 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3804 ISColoringValue *colors; 3805 PetscInt *larray; 3806 ISColoring ocoloring; 3807 3808 /* set coloring for diagonal portion */ 3809 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3810 for (i=0; i<a->A->cmap->n; i++) { 3811 larray[i] = i + A->cmap->rstart; 3812 } 3813 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3814 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3815 for (i=0; i<a->A->cmap->n; i++) { 3816 colors[i] = coloring->colors[larray[i]]; 3817 } 3818 ierr = PetscFree(larray);CHKERRQ(ierr); 3819 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3820 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3821 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3822 3823 /* set coloring for off-diagonal portion */ 3824 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3825 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3826 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3827 for (i=0; i<a->B->cmap->n; i++) { 3828 colors[i] = coloring->colors[larray[i]]; 3829 } 3830 ierr = PetscFree(larray);CHKERRQ(ierr); 3831 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3832 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3833 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3834 } else { 3835 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3836 } 3837 3838 PetscFunctionReturn(0); 3839 } 3840 3841 #if defined(PETSC_HAVE_ADIC) 3842 #undef __FUNCT__ 3843 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3844 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3845 { 3846 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3847 PetscErrorCode ierr; 3848 3849 PetscFunctionBegin; 3850 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3851 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3852 PetscFunctionReturn(0); 3853 } 3854 #endif 3855 3856 #undef __FUNCT__ 3857 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3858 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3859 { 3860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3861 PetscErrorCode ierr; 3862 3863 PetscFunctionBegin; 3864 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3865 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3866 PetscFunctionReturn(0); 3867 } 3868 3869 #undef __FUNCT__ 3870 #define __FUNCT__ "MatMerge" 3871 /*@ 3872 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3873 matrices from each processor 3874 3875 Collective on MPI_Comm 3876 3877 Input Parameters: 3878 + comm - the communicators the parallel matrix will live on 3879 . inmat - the input sequential matrices 3880 . n - number of local columns (or PETSC_DECIDE) 3881 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3882 3883 Output Parameter: 3884 . outmat - the parallel matrix generated 3885 3886 Level: advanced 3887 3888 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3889 3890 @*/ 3891 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3892 { 3893 PetscErrorCode ierr; 3894 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3895 PetscInt *indx; 3896 PetscScalar *values; 3897 3898 PetscFunctionBegin; 3899 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3900 if (scall == MAT_INITIAL_MATRIX){ 3901 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3902 if (n == PETSC_DECIDE){ 3903 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3904 } 3905 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3906 rstart -= m; 3907 3908 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3909 for (i=0;i<m;i++) { 3910 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3911 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3912 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3913 } 3914 /* This routine will ONLY return MPIAIJ type matrix */ 3915 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3916 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3917 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3918 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3919 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3920 3921 } else if (scall == MAT_REUSE_MATRIX){ 3922 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3923 } else { 3924 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3925 } 3926 3927 for (i=0;i<m;i++) { 3928 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3929 Ii = i + rstart; 3930 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3931 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3932 } 3933 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3934 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3936 3937 PetscFunctionReturn(0); 3938 } 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "MatFileSplit" 3942 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3943 { 3944 PetscErrorCode ierr; 3945 PetscMPIInt rank; 3946 PetscInt m,N,i,rstart,nnz; 3947 size_t len; 3948 const PetscInt *indx; 3949 PetscViewer out; 3950 char *name; 3951 Mat B; 3952 const PetscScalar *values; 3953 3954 PetscFunctionBegin; 3955 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3956 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3957 /* Should this be the type of the diagonal block of A? */ 3958 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3959 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3960 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3961 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3962 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3963 for (i=0;i<m;i++) { 3964 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3965 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3966 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3967 } 3968 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3969 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3970 3971 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3972 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3973 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3974 sprintf(name,"%s.%d",outfile,rank); 3975 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3976 ierr = PetscFree(name); 3977 ierr = MatView(B,out);CHKERRQ(ierr); 3978 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3979 ierr = MatDestroy(B);CHKERRQ(ierr); 3980 PetscFunctionReturn(0); 3981 } 3982 3983 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3984 #undef __FUNCT__ 3985 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3986 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3987 { 3988 PetscErrorCode ierr; 3989 Mat_Merge_SeqsToMPI *merge; 3990 PetscContainer container; 3991 3992 PetscFunctionBegin; 3993 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3994 if (container) { 3995 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 4007 4008 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4009 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4010 } 4011 ierr = PetscFree(merge);CHKERRQ(ierr); 4012 4013 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4014 PetscFunctionReturn(0); 4015 } 4016 4017 #include "../src/mat/utils/freespace.h" 4018 #include "petscbt.h" 4019 4020 #undef __FUNCT__ 4021 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4022 /*@C 4023 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4024 matrices from each processor 4025 4026 Collective on MPI_Comm 4027 4028 Input Parameters: 4029 + comm - the communicators the parallel matrix will live on 4030 . seqmat - the input sequential matrices 4031 . m - number of local rows (or PETSC_DECIDE) 4032 . n - number of local columns (or PETSC_DECIDE) 4033 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4034 4035 Output Parameter: 4036 . mpimat - the parallel matrix generated 4037 4038 Level: advanced 4039 4040 Notes: 4041 The dimensions of the sequential matrix in each processor MUST be the same. 4042 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4043 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4044 @*/ 4045 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4046 { 4047 PetscErrorCode ierr; 4048 MPI_Comm comm=((PetscObject)mpimat)->comm; 4049 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4050 PetscMPIInt size,rank,taga,*len_s; 4051 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4052 PetscInt proc,m; 4053 PetscInt **buf_ri,**buf_rj; 4054 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4055 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4056 MPI_Request *s_waits,*r_waits; 4057 MPI_Status *status; 4058 MatScalar *aa=a->a; 4059 MatScalar **abuf_r,*ba_i; 4060 Mat_Merge_SeqsToMPI *merge; 4061 PetscContainer container; 4062 4063 PetscFunctionBegin; 4064 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4065 4066 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4067 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4068 4069 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4070 if (container) { 4071 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4072 } 4073 bi = merge->bi; 4074 bj = merge->bj; 4075 buf_ri = merge->buf_ri; 4076 buf_rj = merge->buf_rj; 4077 4078 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4079 owners = merge->rowmap.range; 4080 len_s = merge->len_s; 4081 4082 /* send and recv matrix values */ 4083 /*-----------------------------*/ 4084 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4085 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4086 4087 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4088 for (proc=0,k=0; proc<size; proc++){ 4089 if (!len_s[proc]) continue; 4090 i = owners[proc]; 4091 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4092 k++; 4093 } 4094 4095 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4096 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4097 ierr = PetscFree(status);CHKERRQ(ierr); 4098 4099 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4100 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4101 4102 /* insert mat values of mpimat */ 4103 /*----------------------------*/ 4104 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4105 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4106 nextrow = buf_ri_k + merge->nrecv; 4107 nextai = nextrow + merge->nrecv; 4108 4109 for (k=0; k<merge->nrecv; k++){ 4110 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4111 nrows = *(buf_ri_k[k]); 4112 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4113 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4114 } 4115 4116 /* set values of ba */ 4117 m = merge->rowmap.n; 4118 for (i=0; i<m; i++) { 4119 arow = owners[rank] + i; 4120 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4121 bnzi = bi[i+1] - bi[i]; 4122 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4123 4124 /* add local non-zero vals of this proc's seqmat into ba */ 4125 anzi = ai[arow+1] - ai[arow]; 4126 aj = a->j + ai[arow]; 4127 aa = a->a + ai[arow]; 4128 nextaj = 0; 4129 for (j=0; nextaj<anzi; j++){ 4130 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4131 ba_i[j] += aa[nextaj++]; 4132 } 4133 } 4134 4135 /* add received vals into ba */ 4136 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4137 /* i-th row */ 4138 if (i == *nextrow[k]) { 4139 anzi = *(nextai[k]+1) - *nextai[k]; 4140 aj = buf_rj[k] + *(nextai[k]); 4141 aa = abuf_r[k] + *(nextai[k]); 4142 nextaj = 0; 4143 for (j=0; nextaj<anzi; j++){ 4144 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4145 ba_i[j] += aa[nextaj++]; 4146 } 4147 } 4148 nextrow[k]++; nextai[k]++; 4149 } 4150 } 4151 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4152 } 4153 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4154 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 4156 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4157 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4158 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4159 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4160 PetscFunctionReturn(0); 4161 } 4162 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4165 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4166 { 4167 PetscErrorCode ierr; 4168 Mat B_mpi; 4169 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4170 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4171 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4172 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4173 PetscInt len,proc,*dnz,*onz; 4174 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4175 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4176 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4177 MPI_Status *status; 4178 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4179 PetscBT lnkbt; 4180 Mat_Merge_SeqsToMPI *merge; 4181 PetscContainer container; 4182 4183 PetscFunctionBegin; 4184 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4185 4186 /* make sure it is a PETSc comm */ 4187 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4188 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4189 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4190 4191 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4192 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4193 4194 /* determine row ownership */ 4195 /*---------------------------------------------------------*/ 4196 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4197 merge->rowmap.n = m; 4198 merge->rowmap.N = M; 4199 merge->rowmap.bs = 1; 4200 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4201 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4202 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4203 4204 m = merge->rowmap.n; 4205 M = merge->rowmap.N; 4206 owners = merge->rowmap.range; 4207 4208 /* determine the number of messages to send, their lengths */ 4209 /*---------------------------------------------------------*/ 4210 len_s = merge->len_s; 4211 4212 len = 0; /* length of buf_si[] */ 4213 merge->nsend = 0; 4214 for (proc=0; proc<size; proc++){ 4215 len_si[proc] = 0; 4216 if (proc == rank){ 4217 len_s[proc] = 0; 4218 } else { 4219 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4220 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4221 } 4222 if (len_s[proc]) { 4223 merge->nsend++; 4224 nrows = 0; 4225 for (i=owners[proc]; i<owners[proc+1]; i++){ 4226 if (ai[i+1] > ai[i]) nrows++; 4227 } 4228 len_si[proc] = 2*(nrows+1); 4229 len += len_si[proc]; 4230 } 4231 } 4232 4233 /* determine the number and length of messages to receive for ij-structure */ 4234 /*-------------------------------------------------------------------------*/ 4235 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4236 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4237 4238 /* post the Irecv of j-structure */ 4239 /*-------------------------------*/ 4240 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4241 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4242 4243 /* post the Isend of j-structure */ 4244 /*--------------------------------*/ 4245 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4246 sj_waits = si_waits + merge->nsend; 4247 4248 for (proc=0, k=0; proc<size; proc++){ 4249 if (!len_s[proc]) continue; 4250 i = owners[proc]; 4251 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4252 k++; 4253 } 4254 4255 /* receives and sends of j-structure are complete */ 4256 /*------------------------------------------------*/ 4257 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4258 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4259 4260 /* send and recv i-structure */ 4261 /*---------------------------*/ 4262 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4263 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4264 4265 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4266 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4267 for (proc=0,k=0; proc<size; proc++){ 4268 if (!len_s[proc]) continue; 4269 /* form outgoing message for i-structure: 4270 buf_si[0]: nrows to be sent 4271 [1:nrows]: row index (global) 4272 [nrows+1:2*nrows+1]: i-structure index 4273 */ 4274 /*-------------------------------------------*/ 4275 nrows = len_si[proc]/2 - 1; 4276 buf_si_i = buf_si + nrows+1; 4277 buf_si[0] = nrows; 4278 buf_si_i[0] = 0; 4279 nrows = 0; 4280 for (i=owners[proc]; i<owners[proc+1]; i++){ 4281 anzi = ai[i+1] - ai[i]; 4282 if (anzi) { 4283 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4284 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4285 nrows++; 4286 } 4287 } 4288 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4289 k++; 4290 buf_si += len_si[proc]; 4291 } 4292 4293 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4294 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4295 4296 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4297 for (i=0; i<merge->nrecv; i++){ 4298 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4299 } 4300 4301 ierr = PetscFree(len_si);CHKERRQ(ierr); 4302 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4303 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4304 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4305 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4306 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4307 ierr = PetscFree(status);CHKERRQ(ierr); 4308 4309 /* compute a local seq matrix in each processor */ 4310 /*----------------------------------------------*/ 4311 /* allocate bi array and free space for accumulating nonzero column info */ 4312 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4313 bi[0] = 0; 4314 4315 /* create and initialize a linked list */ 4316 nlnk = N+1; 4317 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4318 4319 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4320 len = 0; 4321 len = ai[owners[rank+1]] - ai[owners[rank]]; 4322 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4323 current_space = free_space; 4324 4325 /* determine symbolic info for each local row */ 4326 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4327 nextrow = buf_ri_k + merge->nrecv; 4328 nextai = nextrow + merge->nrecv; 4329 for (k=0; k<merge->nrecv; k++){ 4330 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4331 nrows = *buf_ri_k[k]; 4332 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4333 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4334 } 4335 4336 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4337 len = 0; 4338 for (i=0;i<m;i++) { 4339 bnzi = 0; 4340 /* add local non-zero cols of this proc's seqmat into lnk */ 4341 arow = owners[rank] + i; 4342 anzi = ai[arow+1] - ai[arow]; 4343 aj = a->j + ai[arow]; 4344 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4345 bnzi += nlnk; 4346 /* add received col data into lnk */ 4347 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4348 if (i == *nextrow[k]) { /* i-th row */ 4349 anzi = *(nextai[k]+1) - *nextai[k]; 4350 aj = buf_rj[k] + *nextai[k]; 4351 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4352 bnzi += nlnk; 4353 nextrow[k]++; nextai[k]++; 4354 } 4355 } 4356 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4357 4358 /* if free space is not available, make more free space */ 4359 if (current_space->local_remaining<bnzi) { 4360 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4361 nspacedouble++; 4362 } 4363 /* copy data into free space, then initialize lnk */ 4364 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4365 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4366 4367 current_space->array += bnzi; 4368 current_space->local_used += bnzi; 4369 current_space->local_remaining -= bnzi; 4370 4371 bi[i+1] = bi[i] + bnzi; 4372 } 4373 4374 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4375 4376 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4377 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4378 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4379 4380 /* create symbolic parallel matrix B_mpi */ 4381 /*---------------------------------------*/ 4382 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4383 if (n==PETSC_DECIDE) { 4384 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4385 } else { 4386 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4387 } 4388 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4389 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4390 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4391 4392 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4393 B_mpi->assembled = PETSC_FALSE; 4394 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4395 merge->bi = bi; 4396 merge->bj = bj; 4397 merge->buf_ri = buf_ri; 4398 merge->buf_rj = buf_rj; 4399 merge->coi = PETSC_NULL; 4400 merge->coj = PETSC_NULL; 4401 merge->owners_co = PETSC_NULL; 4402 4403 /* attach the supporting struct to B_mpi for reuse */ 4404 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4405 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4406 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4407 *mpimat = B_mpi; 4408 4409 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4410 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 4414 #undef __FUNCT__ 4415 #define __FUNCT__ "MatMerge_SeqsToMPI" 4416 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4417 { 4418 PetscErrorCode ierr; 4419 4420 PetscFunctionBegin; 4421 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4422 if (scall == MAT_INITIAL_MATRIX){ 4423 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4424 } 4425 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4426 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "MatGetLocalMat" 4432 /*@ 4433 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4434 4435 Not Collective 4436 4437 Input Parameters: 4438 + A - the matrix 4439 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4440 4441 Output Parameter: 4442 . A_loc - the local sequential matrix generated 4443 4444 Level: developer 4445 4446 @*/ 4447 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4448 { 4449 PetscErrorCode ierr; 4450 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4451 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4452 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4453 MatScalar *aa=a->a,*ba=b->a,*cam; 4454 PetscScalar *ca; 4455 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4456 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4457 4458 PetscFunctionBegin; 4459 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4460 if (scall == MAT_INITIAL_MATRIX){ 4461 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4462 ci[0] = 0; 4463 for (i=0; i<am; i++){ 4464 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4465 } 4466 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4467 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4468 k = 0; 4469 for (i=0; i<am; i++) { 4470 ncols_o = bi[i+1] - bi[i]; 4471 ncols_d = ai[i+1] - ai[i]; 4472 /* off-diagonal portion of A */ 4473 for (jo=0; jo<ncols_o; jo++) { 4474 col = cmap[*bj]; 4475 if (col >= cstart) break; 4476 cj[k] = col; bj++; 4477 ca[k++] = *ba++; 4478 } 4479 /* diagonal portion of A */ 4480 for (j=0; j<ncols_d; j++) { 4481 cj[k] = cstart + *aj++; 4482 ca[k++] = *aa++; 4483 } 4484 /* off-diagonal portion of A */ 4485 for (j=jo; j<ncols_o; j++) { 4486 cj[k] = cmap[*bj++]; 4487 ca[k++] = *ba++; 4488 } 4489 } 4490 /* put together the new matrix */ 4491 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4492 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4493 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4494 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4495 mat->free_a = PETSC_TRUE; 4496 mat->free_ij = PETSC_TRUE; 4497 mat->nonew = 0; 4498 } else if (scall == MAT_REUSE_MATRIX){ 4499 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4500 ci = mat->i; cj = mat->j; cam = mat->a; 4501 for (i=0; i<am; i++) { 4502 /* off-diagonal portion of A */ 4503 ncols_o = bi[i+1] - bi[i]; 4504 for (jo=0; jo<ncols_o; jo++) { 4505 col = cmap[*bj]; 4506 if (col >= cstart) break; 4507 *cam++ = *ba++; bj++; 4508 } 4509 /* diagonal portion of A */ 4510 ncols_d = ai[i+1] - ai[i]; 4511 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4512 /* off-diagonal portion of A */ 4513 for (j=jo; j<ncols_o; j++) { 4514 *cam++ = *ba++; bj++; 4515 } 4516 } 4517 } else { 4518 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4519 } 4520 4521 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4522 PetscFunctionReturn(0); 4523 } 4524 4525 #undef __FUNCT__ 4526 #define __FUNCT__ "MatGetLocalMatCondensed" 4527 /*@C 4528 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4529 4530 Not Collective 4531 4532 Input Parameters: 4533 + A - the matrix 4534 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4535 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4536 4537 Output Parameter: 4538 . A_loc - the local sequential matrix generated 4539 4540 Level: developer 4541 4542 @*/ 4543 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4544 { 4545 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4546 PetscErrorCode ierr; 4547 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4548 IS isrowa,iscola; 4549 Mat *aloc; 4550 4551 PetscFunctionBegin; 4552 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4553 if (!row){ 4554 start = A->rmap->rstart; end = A->rmap->rend; 4555 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4556 } else { 4557 isrowa = *row; 4558 } 4559 if (!col){ 4560 start = A->cmap->rstart; 4561 cmap = a->garray; 4562 nzA = a->A->cmap->n; 4563 nzB = a->B->cmap->n; 4564 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4565 ncols = 0; 4566 for (i=0; i<nzB; i++) { 4567 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4568 else break; 4569 } 4570 imark = i; 4571 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4572 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4573 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4574 ierr = PetscFree(idx);CHKERRQ(ierr); 4575 } else { 4576 iscola = *col; 4577 } 4578 if (scall != MAT_INITIAL_MATRIX){ 4579 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4580 aloc[0] = *A_loc; 4581 } 4582 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4583 *A_loc = aloc[0]; 4584 ierr = PetscFree(aloc);CHKERRQ(ierr); 4585 if (!row){ 4586 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4587 } 4588 if (!col){ 4589 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4590 } 4591 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4592 PetscFunctionReturn(0); 4593 } 4594 4595 #undef __FUNCT__ 4596 #define __FUNCT__ "MatGetBrowsOfAcols" 4597 /*@C 4598 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4599 4600 Collective on Mat 4601 4602 Input Parameters: 4603 + A,B - the matrices in mpiaij format 4604 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4605 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4606 4607 Output Parameter: 4608 + rowb, colb - index sets of rows and columns of B to extract 4609 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4610 - B_seq - the sequential matrix generated 4611 4612 Level: developer 4613 4614 @*/ 4615 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4616 { 4617 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4618 PetscErrorCode ierr; 4619 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4620 IS isrowb,iscolb; 4621 Mat *bseq; 4622 4623 PetscFunctionBegin; 4624 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4625 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4626 } 4627 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4628 4629 if (scall == MAT_INITIAL_MATRIX){ 4630 start = A->cmap->rstart; 4631 cmap = a->garray; 4632 nzA = a->A->cmap->n; 4633 nzB = a->B->cmap->n; 4634 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4635 ncols = 0; 4636 for (i=0; i<nzB; i++) { /* row < local row index */ 4637 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4638 else break; 4639 } 4640 imark = i; 4641 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4642 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4643 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4644 ierr = PetscFree(idx);CHKERRQ(ierr); 4645 *brstart = imark; 4646 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4647 } else { 4648 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4649 isrowb = *rowb; iscolb = *colb; 4650 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4651 bseq[0] = *B_seq; 4652 } 4653 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4654 *B_seq = bseq[0]; 4655 ierr = PetscFree(bseq);CHKERRQ(ierr); 4656 if (!rowb){ 4657 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4658 } else { 4659 *rowb = isrowb; 4660 } 4661 if (!colb){ 4662 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4663 } else { 4664 *colb = iscolb; 4665 } 4666 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4667 PetscFunctionReturn(0); 4668 } 4669 4670 #undef __FUNCT__ 4671 #define __FUNCT__ "MatGetBrowsOfAoCols" 4672 /*@C 4673 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4674 of the OFF-DIAGONAL portion of local A 4675 4676 Collective on Mat 4677 4678 Input Parameters: 4679 + A,B - the matrices in mpiaij format 4680 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4681 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4682 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4683 4684 Output Parameter: 4685 + B_oth - the sequential matrix generated 4686 4687 Level: developer 4688 4689 @*/ 4690 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4691 { 4692 VecScatter_MPI_General *gen_to,*gen_from; 4693 PetscErrorCode ierr; 4694 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4695 Mat_SeqAIJ *b_oth; 4696 VecScatter ctx=a->Mvctx; 4697 MPI_Comm comm=((PetscObject)ctx)->comm; 4698 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4699 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4700 PetscScalar *rvalues,*svalues; 4701 MatScalar *b_otha,*bufa,*bufA; 4702 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4703 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4704 MPI_Status *sstatus,rstatus; 4705 PetscMPIInt jj; 4706 PetscInt *cols,sbs,rbs; 4707 PetscScalar *vals; 4708 4709 PetscFunctionBegin; 4710 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4711 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4712 } 4713 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4714 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4715 4716 gen_to = (VecScatter_MPI_General*)ctx->todata; 4717 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4718 rvalues = gen_from->values; /* holds the length of receiving row */ 4719 svalues = gen_to->values; /* holds the length of sending row */ 4720 nrecvs = gen_from->n; 4721 nsends = gen_to->n; 4722 4723 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4724 srow = gen_to->indices; /* local row index to be sent */ 4725 sstarts = gen_to->starts; 4726 sprocs = gen_to->procs; 4727 sstatus = gen_to->sstatus; 4728 sbs = gen_to->bs; 4729 rstarts = gen_from->starts; 4730 rprocs = gen_from->procs; 4731 rbs = gen_from->bs; 4732 4733 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4734 if (scall == MAT_INITIAL_MATRIX){ 4735 /* i-array */ 4736 /*---------*/ 4737 /* post receives */ 4738 for (i=0; i<nrecvs; i++){ 4739 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4740 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4741 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4742 } 4743 4744 /* pack the outgoing message */ 4745 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4746 rstartsj = sstartsj + nsends +1; 4747 sstartsj[0] = 0; rstartsj[0] = 0; 4748 len = 0; /* total length of j or a array to be sent */ 4749 k = 0; 4750 for (i=0; i<nsends; i++){ 4751 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4752 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4753 for (j=0; j<nrows; j++) { 4754 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4755 for (l=0; l<sbs; l++){ 4756 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4757 rowlen[j*sbs+l] = ncols; 4758 len += ncols; 4759 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4760 } 4761 k++; 4762 } 4763 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4764 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4765 } 4766 /* recvs and sends of i-array are completed */ 4767 i = nrecvs; 4768 while (i--) { 4769 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4770 } 4771 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4772 4773 /* allocate buffers for sending j and a arrays */ 4774 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4775 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4776 4777 /* create i-array of B_oth */ 4778 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4779 b_othi[0] = 0; 4780 len = 0; /* total length of j or a array to be received */ 4781 k = 0; 4782 for (i=0; i<nrecvs; i++){ 4783 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4784 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4785 for (j=0; j<nrows; j++) { 4786 b_othi[k+1] = b_othi[k] + rowlen[j]; 4787 len += rowlen[j]; k++; 4788 } 4789 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4790 } 4791 4792 /* allocate space for j and a arrrays of B_oth */ 4793 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4794 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4795 4796 /* j-array */ 4797 /*---------*/ 4798 /* post receives of j-array */ 4799 for (i=0; i<nrecvs; i++){ 4800 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4801 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4802 } 4803 4804 /* pack the outgoing message j-array */ 4805 k = 0; 4806 for (i=0; i<nsends; i++){ 4807 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4808 bufJ = bufj+sstartsj[i]; 4809 for (j=0; j<nrows; j++) { 4810 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4811 for (ll=0; ll<sbs; ll++){ 4812 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4813 for (l=0; l<ncols; l++){ 4814 *bufJ++ = cols[l]; 4815 } 4816 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4817 } 4818 } 4819 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4820 } 4821 4822 /* recvs and sends of j-array are completed */ 4823 i = nrecvs; 4824 while (i--) { 4825 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4826 } 4827 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4828 } else if (scall == MAT_REUSE_MATRIX){ 4829 sstartsj = *startsj; 4830 rstartsj = sstartsj + nsends +1; 4831 bufa = *bufa_ptr; 4832 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4833 b_otha = b_oth->a; 4834 } else { 4835 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4836 } 4837 4838 /* a-array */ 4839 /*---------*/ 4840 /* post receives of a-array */ 4841 for (i=0; i<nrecvs; i++){ 4842 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4843 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4844 } 4845 4846 /* pack the outgoing message a-array */ 4847 k = 0; 4848 for (i=0; i<nsends; i++){ 4849 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4850 bufA = bufa+sstartsj[i]; 4851 for (j=0; j<nrows; j++) { 4852 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4853 for (ll=0; ll<sbs; ll++){ 4854 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4855 for (l=0; l<ncols; l++){ 4856 *bufA++ = vals[l]; 4857 } 4858 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4859 } 4860 } 4861 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4862 } 4863 /* recvs and sends of a-array are completed */ 4864 i = nrecvs; 4865 while (i--) { 4866 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4867 } 4868 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4869 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4870 4871 if (scall == MAT_INITIAL_MATRIX){ 4872 /* put together the new matrix */ 4873 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4874 4875 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4876 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4877 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4878 b_oth->free_a = PETSC_TRUE; 4879 b_oth->free_ij = PETSC_TRUE; 4880 b_oth->nonew = 0; 4881 4882 ierr = PetscFree(bufj);CHKERRQ(ierr); 4883 if (!startsj || !bufa_ptr){ 4884 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4885 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4886 } else { 4887 *startsj = sstartsj; 4888 *bufa_ptr = bufa; 4889 } 4890 } 4891 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4892 PetscFunctionReturn(0); 4893 } 4894 4895 #undef __FUNCT__ 4896 #define __FUNCT__ "MatGetCommunicationStructs" 4897 /*@C 4898 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4899 4900 Not Collective 4901 4902 Input Parameters: 4903 . A - The matrix in mpiaij format 4904 4905 Output Parameter: 4906 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4907 . colmap - A map from global column index to local index into lvec 4908 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4909 4910 Level: developer 4911 4912 @*/ 4913 #if defined (PETSC_USE_CTABLE) 4914 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4915 #else 4916 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4917 #endif 4918 { 4919 Mat_MPIAIJ *a; 4920 4921 PetscFunctionBegin; 4922 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4923 PetscValidPointer(lvec, 2) 4924 PetscValidPointer(colmap, 3) 4925 PetscValidPointer(multScatter, 4) 4926 a = (Mat_MPIAIJ *) A->data; 4927 if (lvec) *lvec = a->lvec; 4928 if (colmap) *colmap = a->colmap; 4929 if (multScatter) *multScatter = a->Mvctx; 4930 PetscFunctionReturn(0); 4931 } 4932 4933 EXTERN_C_BEGIN 4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4936 EXTERN_C_END 4937 4938 #include "../src/mat/impls/dense/mpi/mpidense.h" 4939 4940 #undef __FUNCT__ 4941 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4942 /* 4943 Computes (B'*A')' since computing B*A directly is untenable 4944 4945 n p p 4946 ( ) ( ) ( ) 4947 m ( A ) * n ( B ) = m ( C ) 4948 ( ) ( ) ( ) 4949 4950 */ 4951 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4952 { 4953 PetscErrorCode ierr; 4954 Mat At,Bt,Ct; 4955 4956 PetscFunctionBegin; 4957 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4958 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4959 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4960 ierr = MatDestroy(At);CHKERRQ(ierr); 4961 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4962 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4963 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 4967 #undef __FUNCT__ 4968 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4969 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4970 { 4971 PetscErrorCode ierr; 4972 PetscInt m=A->rmap->n,n=B->cmap->n; 4973 Mat Cmat; 4974 4975 PetscFunctionBegin; 4976 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4977 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4978 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4979 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4980 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4981 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4982 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4983 *C = Cmat; 4984 PetscFunctionReturn(0); 4985 } 4986 4987 /* ----------------------------------------------------------------*/ 4988 #undef __FUNCT__ 4989 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4990 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4991 { 4992 PetscErrorCode ierr; 4993 4994 PetscFunctionBegin; 4995 if (scall == MAT_INITIAL_MATRIX){ 4996 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4997 } 4998 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4999 PetscFunctionReturn(0); 5000 } 5001 5002 EXTERN_C_BEGIN 5003 #if defined(PETSC_HAVE_MUMPS) 5004 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5005 #endif 5006 #if defined(PETSC_HAVE_PASTIX) 5007 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5008 #endif 5009 #if defined(PETSC_HAVE_SUPERLU_DIST) 5010 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5011 #endif 5012 #if defined(PETSC_HAVE_SPOOLES) 5013 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5014 #endif 5015 EXTERN_C_END 5016 5017 /*MC 5018 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5019 5020 Options Database Keys: 5021 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5022 5023 Level: beginner 5024 5025 .seealso: MatCreateMPIAIJ() 5026 M*/ 5027 5028 EXTERN_C_BEGIN 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "MatCreate_MPIAIJ" 5031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5032 { 5033 Mat_MPIAIJ *b; 5034 PetscErrorCode ierr; 5035 PetscMPIInt size; 5036 5037 PetscFunctionBegin; 5038 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5039 5040 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5041 B->data = (void*)b; 5042 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5043 B->rmap->bs = 1; 5044 B->assembled = PETSC_FALSE; 5045 B->mapping = 0; 5046 5047 B->insertmode = NOT_SET_VALUES; 5048 b->size = size; 5049 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5050 5051 /* build cache for off array entries formed */ 5052 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5053 b->donotstash = PETSC_FALSE; 5054 b->colmap = 0; 5055 b->garray = 0; 5056 b->roworiented = PETSC_TRUE; 5057 5058 /* stuff used for matrix vector multiply */ 5059 b->lvec = PETSC_NULL; 5060 b->Mvctx = PETSC_NULL; 5061 5062 /* stuff for MatGetRow() */ 5063 b->rowindices = 0; 5064 b->rowvalues = 0; 5065 b->getrowactive = PETSC_FALSE; 5066 5067 #if defined(PETSC_HAVE_SPOOLES) 5068 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5069 "MatGetFactor_mpiaij_spooles", 5070 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5071 #endif 5072 #if defined(PETSC_HAVE_MUMPS) 5073 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5074 "MatGetFactor_mpiaij_mumps", 5075 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5076 #endif 5077 #if defined(PETSC_HAVE_PASTIX) 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5079 "MatGetFactor_mpiaij_pastix", 5080 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5081 #endif 5082 #if defined(PETSC_HAVE_SUPERLU_DIST) 5083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5084 "MatGetFactor_mpiaij_superlu_dist", 5085 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5086 #endif 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5088 "MatStoreValues_MPIAIJ", 5089 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5091 "MatRetrieveValues_MPIAIJ", 5092 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5094 "MatGetDiagonalBlock_MPIAIJ", 5095 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5097 "MatIsTranspose_MPIAIJ", 5098 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5100 "MatMPIAIJSetPreallocation_MPIAIJ", 5101 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5103 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5104 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5106 "MatDiagonalScaleLocal_MPIAIJ", 5107 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5109 "MatConvert_MPIAIJ_MPICSRPERM", 5110 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5112 "MatConvert_MPIAIJ_MPICRL", 5113 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5115 "MatMatMult_MPIDense_MPIAIJ", 5116 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5118 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5119 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5120 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5121 "MatMatMultNumeric_MPIDense_MPIAIJ", 5122 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5123 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5124 PetscFunctionReturn(0); 5125 } 5126 EXTERN_C_END 5127 5128 #undef __FUNCT__ 5129 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5130 /*@ 5131 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5132 and "off-diagonal" part of the matrix in CSR format. 5133 5134 Collective on MPI_Comm 5135 5136 Input Parameters: 5137 + comm - MPI communicator 5138 . m - number of local rows (Cannot be PETSC_DECIDE) 5139 . n - This value should be the same as the local size used in creating the 5140 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5141 calculated if N is given) For square matrices n is almost always m. 5142 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5143 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5144 . i - row indices for "diagonal" portion of matrix 5145 . j - column indices 5146 . a - matrix values 5147 . oi - row indices for "off-diagonal" portion of matrix 5148 . oj - column indices 5149 - oa - matrix values 5150 5151 Output Parameter: 5152 . mat - the matrix 5153 5154 Level: advanced 5155 5156 Notes: 5157 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5158 5159 The i and j indices are 0 based 5160 5161 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5162 5163 This sets local rows and cannot be used to set off-processor values. 5164 5165 You cannot later use MatSetValues() to change values in this matrix. 5166 5167 .keywords: matrix, aij, compressed row, sparse, parallel 5168 5169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5170 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5171 @*/ 5172 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5173 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5174 { 5175 PetscErrorCode ierr; 5176 Mat_MPIAIJ *maij; 5177 5178 PetscFunctionBegin; 5179 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5180 if (i[0]) { 5181 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5182 } 5183 if (oi[0]) { 5184 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5185 } 5186 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5187 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5188 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5189 maij = (Mat_MPIAIJ*) (*mat)->data; 5190 maij->donotstash = PETSC_TRUE; 5191 (*mat)->preallocated = PETSC_TRUE; 5192 5193 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5194 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5195 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5196 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5197 5198 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5199 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5200 5201 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 5206 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5207 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5208 PetscFunctionReturn(0); 5209 } 5210 5211 /* 5212 Special version for direct calls from Fortran 5213 */ 5214 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5215 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5216 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5217 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5218 #endif 5219 5220 /* Change these macros so can be used in void function */ 5221 #undef CHKERRQ 5222 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5223 #undef SETERRQ2 5224 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5225 #undef SETERRQ 5226 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5227 5228 EXTERN_C_BEGIN 5229 #undef __FUNCT__ 5230 #define __FUNCT__ "matsetvaluesmpiaij_" 5231 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5232 { 5233 Mat mat = *mmat; 5234 PetscInt m = *mm, n = *mn; 5235 InsertMode addv = *maddv; 5236 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5237 PetscScalar value; 5238 PetscErrorCode ierr; 5239 5240 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5241 if (mat->insertmode == NOT_SET_VALUES) { 5242 mat->insertmode = addv; 5243 } 5244 #if defined(PETSC_USE_DEBUG) 5245 else if (mat->insertmode != addv) { 5246 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5247 } 5248 #endif 5249 { 5250 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5251 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5252 PetscTruth roworiented = aij->roworiented; 5253 5254 /* Some Variables required in the macro */ 5255 Mat A = aij->A; 5256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5257 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5258 MatScalar *aa = a->a; 5259 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5260 Mat B = aij->B; 5261 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5262 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5263 MatScalar *ba = b->a; 5264 5265 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5266 PetscInt nonew = a->nonew; 5267 MatScalar *ap1,*ap2; 5268 5269 PetscFunctionBegin; 5270 for (i=0; i<m; i++) { 5271 if (im[i] < 0) continue; 5272 #if defined(PETSC_USE_DEBUG) 5273 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5274 #endif 5275 if (im[i] >= rstart && im[i] < rend) { 5276 row = im[i] - rstart; 5277 lastcol1 = -1; 5278 rp1 = aj + ai[row]; 5279 ap1 = aa + ai[row]; 5280 rmax1 = aimax[row]; 5281 nrow1 = ailen[row]; 5282 low1 = 0; 5283 high1 = nrow1; 5284 lastcol2 = -1; 5285 rp2 = bj + bi[row]; 5286 ap2 = ba + bi[row]; 5287 rmax2 = bimax[row]; 5288 nrow2 = bilen[row]; 5289 low2 = 0; 5290 high2 = nrow2; 5291 5292 for (j=0; j<n; j++) { 5293 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5294 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5295 if (in[j] >= cstart && in[j] < cend){ 5296 col = in[j] - cstart; 5297 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5298 } else if (in[j] < 0) continue; 5299 #if defined(PETSC_USE_DEBUG) 5300 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5301 #endif 5302 else { 5303 if (mat->was_assembled) { 5304 if (!aij->colmap) { 5305 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5306 } 5307 #if defined (PETSC_USE_CTABLE) 5308 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5309 col--; 5310 #else 5311 col = aij->colmap[in[j]] - 1; 5312 #endif 5313 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5314 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5315 col = in[j]; 5316 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5317 B = aij->B; 5318 b = (Mat_SeqAIJ*)B->data; 5319 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5320 rp2 = bj + bi[row]; 5321 ap2 = ba + bi[row]; 5322 rmax2 = bimax[row]; 5323 nrow2 = bilen[row]; 5324 low2 = 0; 5325 high2 = nrow2; 5326 bm = aij->B->rmap->n; 5327 ba = b->a; 5328 } 5329 } else col = in[j]; 5330 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5331 } 5332 } 5333 } else { 5334 if (!aij->donotstash) { 5335 if (roworiented) { 5336 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5337 } else { 5338 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5339 } 5340 } 5341 } 5342 }} 5343 PetscFunctionReturnVoid(); 5344 } 5345 EXTERN_C_END 5346 5347