1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 if (v) PetscValidScalarPointer(v,6); 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 551 { 552 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 557 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatZeroRows_MPIAIJ" 563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 564 { 565 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 568 PetscInt i,*owners = A->rmap->range; 569 PetscInt *nprocs,j,idx,nsends,row; 570 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 571 PetscInt *rvalues,count,base,slen,*source; 572 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 573 MPI_Comm comm = ((PetscObject)A)->comm; 574 MPI_Request *send_waits,*recv_waits; 575 MPI_Status recv_status,*send_status; 576 #if defined(PETSC_DEBUG) 577 PetscTruth found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 /* inform other processors of number of messages and max length*/ 608 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 609 610 /* post receives: */ 611 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 612 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 613 for (i=0; i<nrecvs; i++) { 614 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 615 } 616 617 /* do sends: 618 1) starts[i] gives the starting index in svalues for stuff going to 619 the ith processor 620 */ 621 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 622 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 623 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 624 starts[0] = 0; 625 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 626 for (i=0; i<N; i++) { 627 svalues[starts[owner[i]]++] = rows[i]; 628 } 629 630 starts[0] = 0; 631 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 count = 0; 633 for (i=0; i<size; i++) { 634 if (nprocs[2*i+1]) { 635 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscFree(starts);CHKERRQ(ierr); 639 640 base = owners[rank]; 641 642 /* wait on receives */ 643 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_COOKIE; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_STRUCTURALLY_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_HERMITIAN: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_SYMMETRY_ETERNAL: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 default: 1461 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetRow_MPIAIJ" 1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1469 { 1470 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1471 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1472 PetscErrorCode ierr; 1473 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1474 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1475 PetscInt *cmap,*idx_p; 1476 1477 PetscFunctionBegin; 1478 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1479 mat->getrowactive = PETSC_TRUE; 1480 1481 if (!mat->rowvalues && (idx || v)) { 1482 /* 1483 allocate enough space to hold information from the longest row. 1484 */ 1485 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1486 PetscInt max = 1,tmp; 1487 for (i=0; i<matin->rmap->n; i++) { 1488 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1489 if (max < tmp) { max = tmp; } 1490 } 1491 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1492 } 1493 1494 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1495 lrow = row - rstart; 1496 1497 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1498 if (!v) {pvA = 0; pvB = 0;} 1499 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1500 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 nztot = nzA + nzB; 1503 1504 cmap = mat->garray; 1505 if (v || idx) { 1506 if (nztot) { 1507 /* Sort by increasing column numbers, assuming A and B already sorted */ 1508 PetscInt imark = -1; 1509 if (v) { 1510 *v = v_p = mat->rowvalues; 1511 for (i=0; i<nzB; i++) { 1512 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1513 else break; 1514 } 1515 imark = i; 1516 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1517 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1518 } 1519 if (idx) { 1520 *idx = idx_p = mat->rowindices; 1521 if (imark > -1) { 1522 for (i=0; i<imark; i++) { 1523 idx_p[i] = cmap[cworkB[i]]; 1524 } 1525 } else { 1526 for (i=0; i<nzB; i++) { 1527 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1528 else break; 1529 } 1530 imark = i; 1531 } 1532 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1533 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 if (idx) *idx = 0; 1537 if (v) *v = 0; 1538 } 1539 } 1540 *nz = nztot; 1541 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1551 1552 PetscFunctionBegin; 1553 if (!aij->getrowactive) { 1554 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1555 } 1556 aij->getrowactive = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatNorm_MPIAIJ" 1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1563 { 1564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1565 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,cstart = mat->cmap->rstart; 1568 PetscReal sum = 0.0; 1569 MatScalar *v; 1570 1571 PetscFunctionBegin; 1572 if (aij->size == 1) { 1573 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1574 } else { 1575 if (type == NORM_FROBENIUS) { 1576 v = amat->a; 1577 for (i=0; i<amat->nz; i++) { 1578 #if defined(PETSC_USE_COMPLEX) 1579 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1580 #else 1581 sum += (*v)*(*v); v++; 1582 #endif 1583 } 1584 v = bmat->a; 1585 for (i=0; i<bmat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1593 *norm = sqrt(*norm); 1594 } else if (type == NORM_1) { /* max column norm */ 1595 PetscReal *tmp,*tmp2; 1596 PetscInt *jj,*garray = aij->garray; 1597 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1599 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1600 *norm = 0.0; 1601 v = amat->a; jj = amat->j; 1602 for (j=0; j<amat->nz; j++) { 1603 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1604 } 1605 v = bmat->a; jj = bmat->j; 1606 for (j=0; j<bmat->nz; j++) { 1607 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1608 } 1609 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1610 for (j=0; j<mat->cmap->N; j++) { 1611 if (tmp2[j] > *norm) *norm = tmp2[j]; 1612 } 1613 ierr = PetscFree(tmp);CHKERRQ(ierr); 1614 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1615 } else if (type == NORM_INFINITY) { /* max row norm */ 1616 PetscReal ntemp = 0.0; 1617 for (j=0; j<aij->A->rmap->n; j++) { 1618 v = amat->a + amat->i[j]; 1619 sum = 0.0; 1620 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1621 sum += PetscAbsScalar(*v); v++; 1622 } 1623 v = bmat->a + bmat->i[j]; 1624 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1625 sum += PetscAbsScalar(*v); v++; 1626 } 1627 if (sum > ntemp) ntemp = sum; 1628 } 1629 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1630 } else { 1631 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1632 } 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatTranspose_MPIAIJ" 1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1640 { 1641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1642 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1643 PetscErrorCode ierr; 1644 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1645 PetscInt cstart=A->cmap->rstart,ncol; 1646 Mat B; 1647 MatScalar *array; 1648 1649 PetscFunctionBegin; 1650 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1651 1652 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1653 ai = Aloc->i; aj = Aloc->j; 1654 bi = Bloc->i; bj = Bloc->j; 1655 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1656 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1657 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1658 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1659 for (i=0; i<ai[ma]; i++){ 1660 d_nnz[aj[i]] ++; 1661 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1662 } 1663 1664 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1665 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1666 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1667 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1668 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1669 } else { 1670 B = *matout; 1671 } 1672 1673 /* copy over the A part */ 1674 array = Aloc->a; 1675 row = A->rmap->rstart; 1676 for (i=0; i<ma; i++) { 1677 ncol = ai[i+1]-ai[i]; 1678 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1679 row++; array += ncol; aj += ncol; 1680 } 1681 aj = Aloc->j; 1682 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1683 1684 /* copy over the B part */ 1685 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1686 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1687 array = Bloc->a; 1688 row = A->rmap->rstart; 1689 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1690 cols_tmp = cols; 1691 for (i=0; i<mb; i++) { 1692 ncol = bi[i+1]-bi[i]; 1693 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1694 row++; array += ncol; cols_tmp += ncol; 1695 } 1696 ierr = PetscFree(cols);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1701 *matout = B; 1702 } else { 1703 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1713 Mat a = aij->A,b = aij->B; 1714 PetscErrorCode ierr; 1715 PetscInt s1,s2,s3; 1716 1717 PetscFunctionBegin; 1718 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1719 if (rr) { 1720 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1721 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1722 /* Overlap communication with computation. */ 1723 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 } 1725 if (ll) { 1726 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1727 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1728 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1729 } 1730 /* scale the diagonal block */ 1731 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1732 1733 if (rr) { 1734 /* Do a scatter end and then right scale the off-diagonal block */ 1735 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1737 } 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1751 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1752 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1759 { 1760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1761 PetscErrorCode ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatEqual_MPIAIJ" 1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1771 { 1772 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1773 Mat a,b,c,d; 1774 PetscTruth flg; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 a = matA->A; b = matA->B; 1779 c = matB->A; d = matB->B; 1780 1781 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1782 if (flg) { 1783 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1784 } 1785 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatCopy_MPIAIJ" 1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1792 { 1793 PetscErrorCode ierr; 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1795 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 /* because of the column compression in the off-processor part of the matrix a->B, 1801 the number of columns in a->B and b->B may be different, hence we cannot call 1802 the MatCopy() directly on the two parts. If need be, we can provide a more 1803 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1804 then copying the submatrices */ 1805 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1808 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1816 { 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatAXPY_MPIAIJ" 1826 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1827 { 1828 PetscErrorCode ierr; 1829 PetscInt i; 1830 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1831 PetscBLASInt bnz,one=1; 1832 Mat_SeqAIJ *x,*y; 1833 1834 PetscFunctionBegin; 1835 if (str == SAME_NONZERO_PATTERN) { 1836 PetscScalar alpha = a; 1837 x = (Mat_SeqAIJ *)xx->A->data; 1838 y = (Mat_SeqAIJ *)yy->A->data; 1839 bnz = PetscBLASIntCast(x->nz); 1840 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1841 x = (Mat_SeqAIJ *)xx->B->data; 1842 y = (Mat_SeqAIJ *)yy->B->data; 1843 bnz = PetscBLASIntCast(x->nz); 1844 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1845 } else if (str == SUBSET_NONZERO_PATTERN) { 1846 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1847 1848 x = (Mat_SeqAIJ *)xx->B->data; 1849 y = (Mat_SeqAIJ *)yy->B->data; 1850 if (y->xtoy && y->XtoY != xx->B) { 1851 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1852 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1853 } 1854 if (!y->xtoy) { /* get xtoy */ 1855 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1856 y->XtoY = xx->B; 1857 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1858 } 1859 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1860 } else { 1861 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatConjugate_MPIAIJ" 1870 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1871 { 1872 #if defined(PETSC_USE_COMPLEX) 1873 PetscErrorCode ierr; 1874 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1875 1876 PetscFunctionBegin; 1877 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1878 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1879 #else 1880 PetscFunctionBegin; 1881 #endif 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatRealPart_MPIAIJ" 1887 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1888 { 1889 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1890 PetscErrorCode ierr; 1891 1892 PetscFunctionBegin; 1893 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1894 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1900 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1901 { 1902 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1907 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #ifdef PETSC_HAVE_PBGL 1912 1913 #include <boost/parallel/mpi/bsp_process_group.hpp> 1914 #include <boost/graph/distributed/ilu_default_graph.hpp> 1915 #include <boost/graph/distributed/ilu_0_block.hpp> 1916 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1917 #include <boost/graph/distributed/petsc/interface.hpp> 1918 #include <boost/multi_array.hpp> 1919 #include <boost/parallel/distributed_property_map->hpp> 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1923 /* 1924 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1925 */ 1926 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1927 { 1928 namespace petsc = boost::distributed::petsc; 1929 1930 namespace graph_dist = boost::graph::distributed; 1931 using boost::graph::distributed::ilu_default::process_group_type; 1932 using boost::graph::ilu_permuted; 1933 1934 PetscTruth row_identity, col_identity; 1935 PetscContainer c; 1936 PetscInt m, n, M, N; 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1941 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1942 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1943 if (!row_identity || !col_identity) { 1944 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1945 } 1946 1947 process_group_type pg; 1948 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1949 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1950 lgraph_type& level_graph = *lgraph_p; 1951 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1952 1953 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1954 ilu_permuted(level_graph); 1955 1956 /* put together the new matrix */ 1957 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1958 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1959 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1960 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1961 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1962 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 1965 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1966 ierr = PetscContainerSetPointer(c, lgraph_p); 1967 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1973 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1974 { 1975 PetscFunctionBegin; 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatSolve_MPIAIJ" 1981 /* 1982 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1983 */ 1984 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1985 { 1986 namespace graph_dist = boost::graph::distributed; 1987 1988 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1989 lgraph_type* lgraph_p; 1990 PetscContainer c; 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1995 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1996 ierr = VecCopy(b, x);CHKERRQ(ierr); 1997 1998 PetscScalar* array_x; 1999 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2000 PetscInt sx; 2001 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2002 2003 PetscScalar* array_b; 2004 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2005 PetscInt sb; 2006 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2007 2008 lgraph_type& level_graph = *lgraph_p; 2009 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2010 2011 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2012 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2013 ref_x(array_x, boost::extents[num_vertices(graph)]); 2014 2015 typedef boost::iterator_property_map<array_ref_type::iterator, 2016 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2017 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2018 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2019 2020 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2021 2022 PetscFunctionReturn(0); 2023 } 2024 #endif 2025 2026 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2027 PetscInt nzlocal,nsends,nrecvs; 2028 PetscMPIInt *send_rank,*recv_rank; 2029 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2030 PetscScalar *sbuf_a,**rbuf_a; 2031 PetscErrorCode (*MatDestroy)(Mat); 2032 } Mat_Redundant; 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2036 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2037 { 2038 PetscErrorCode ierr; 2039 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2040 PetscInt i; 2041 2042 PetscFunctionBegin; 2043 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2044 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2046 for (i=0; i<redund->nrecvs; i++){ 2047 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2048 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2049 } 2050 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2051 ierr = PetscFree(redund);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatDestroy_MatRedundant" 2057 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2058 { 2059 PetscErrorCode ierr; 2060 PetscContainer container; 2061 Mat_Redundant *redund=PETSC_NULL; 2062 2063 PetscFunctionBegin; 2064 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2065 if (container) { 2066 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2067 } else { 2068 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2069 } 2070 A->ops->destroy = redund->MatDestroy; 2071 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2072 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2073 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2079 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2080 { 2081 PetscMPIInt rank,size; 2082 MPI_Comm comm=((PetscObject)mat)->comm; 2083 PetscErrorCode ierr; 2084 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2085 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2086 PetscInt *rowrange=mat->rmap->range; 2087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2088 Mat A=aij->A,B=aij->B,C=*matredundant; 2089 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2090 PetscScalar *sbuf_a; 2091 PetscInt nzlocal=a->nz+b->nz; 2092 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2093 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2094 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2095 MatScalar *aworkA,*aworkB; 2096 PetscScalar *vals; 2097 PetscMPIInt tag1,tag2,tag3,imdex; 2098 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2099 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2100 MPI_Status recv_status,*send_status; 2101 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2102 PetscInt **rbuf_j=PETSC_NULL; 2103 PetscScalar **rbuf_a=PETSC_NULL; 2104 Mat_Redundant *redund=PETSC_NULL; 2105 PetscContainer container; 2106 2107 PetscFunctionBegin; 2108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2110 2111 if (reuse == MAT_REUSE_MATRIX) { 2112 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2113 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2114 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2115 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2116 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2117 if (container) { 2118 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2119 } else { 2120 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2121 } 2122 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2123 2124 nsends = redund->nsends; 2125 nrecvs = redund->nrecvs; 2126 send_rank = redund->send_rank; 2127 recv_rank = redund->recv_rank; 2128 sbuf_nz = redund->sbuf_nz; 2129 rbuf_nz = redund->rbuf_nz; 2130 sbuf_j = redund->sbuf_j; 2131 sbuf_a = redund->sbuf_a; 2132 rbuf_j = redund->rbuf_j; 2133 rbuf_a = redund->rbuf_a; 2134 } 2135 2136 if (reuse == MAT_INITIAL_MATRIX){ 2137 PetscMPIInt subrank,subsize; 2138 PetscInt nleftover,np_subcomm; 2139 /* get the destination processors' id send_rank, nsends and nrecvs */ 2140 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2141 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2142 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2143 np_subcomm = size/nsubcomm; 2144 nleftover = size - nsubcomm*np_subcomm; 2145 nsends = 0; nrecvs = 0; 2146 for (i=0; i<size; i++){ /* i=rank*/ 2147 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2148 send_rank[nsends] = i; nsends++; 2149 recv_rank[nrecvs++] = i; 2150 } 2151 } 2152 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2153 i = size-nleftover-1; 2154 j = 0; 2155 while (j < nsubcomm - nleftover){ 2156 send_rank[nsends++] = i; 2157 i--; j++; 2158 } 2159 } 2160 2161 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2162 for (i=0; i<nleftover; i++){ 2163 recv_rank[nrecvs++] = size-nleftover+i; 2164 } 2165 } 2166 2167 /* allocate sbuf_j, sbuf_a */ 2168 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2169 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2170 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2171 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2172 2173 /* copy mat's local entries into the buffers */ 2174 if (reuse == MAT_INITIAL_MATRIX){ 2175 rownz_max = 0; 2176 rptr = sbuf_j; 2177 cols = sbuf_j + rend-rstart + 1; 2178 vals = sbuf_a; 2179 rptr[0] = 0; 2180 for (i=0; i<rend-rstart; i++){ 2181 row = i + rstart; 2182 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2183 ncols = nzA + nzB; 2184 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2185 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2186 /* load the column indices for this row into cols */ 2187 lwrite = 0; 2188 for (l=0; l<nzB; l++) { 2189 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2190 vals[lwrite] = aworkB[l]; 2191 cols[lwrite++] = ctmp; 2192 } 2193 } 2194 for (l=0; l<nzA; l++){ 2195 vals[lwrite] = aworkA[l]; 2196 cols[lwrite++] = cstart + cworkA[l]; 2197 } 2198 for (l=0; l<nzB; l++) { 2199 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2200 vals[lwrite] = aworkB[l]; 2201 cols[lwrite++] = ctmp; 2202 } 2203 } 2204 vals += ncols; 2205 cols += ncols; 2206 rptr[i+1] = rptr[i] + ncols; 2207 if (rownz_max < ncols) rownz_max = ncols; 2208 } 2209 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2210 } else { /* only copy matrix values into sbuf_a */ 2211 rptr = sbuf_j; 2212 vals = sbuf_a; 2213 rptr[0] = 0; 2214 for (i=0; i<rend-rstart; i++){ 2215 row = i + rstart; 2216 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2217 ncols = nzA + nzB; 2218 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2219 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2220 lwrite = 0; 2221 for (l=0; l<nzB; l++) { 2222 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2223 } 2224 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2225 for (l=0; l<nzB; l++) { 2226 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2227 } 2228 vals += ncols; 2229 rptr[i+1] = rptr[i] + ncols; 2230 } 2231 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2232 2233 /* send nzlocal to others, and recv other's nzlocal */ 2234 /*--------------------------------------------------*/ 2235 if (reuse == MAT_INITIAL_MATRIX){ 2236 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2237 s_waits2 = s_waits3 + nsends; 2238 s_waits1 = s_waits2 + nsends; 2239 r_waits1 = s_waits1 + nsends; 2240 r_waits2 = r_waits1 + nrecvs; 2241 r_waits3 = r_waits2 + nrecvs; 2242 } else { 2243 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2244 r_waits3 = s_waits3 + nsends; 2245 } 2246 2247 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2248 if (reuse == MAT_INITIAL_MATRIX){ 2249 /* get new tags to keep the communication clean */ 2250 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2252 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2253 2254 /* post receives of other's nzlocal */ 2255 for (i=0; i<nrecvs; i++){ 2256 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2257 } 2258 /* send nzlocal to others */ 2259 for (i=0; i<nsends; i++){ 2260 sbuf_nz[i] = nzlocal; 2261 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2262 } 2263 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2264 count = nrecvs; 2265 while (count) { 2266 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2267 recv_rank[imdex] = recv_status.MPI_SOURCE; 2268 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2269 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2270 2271 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2272 rbuf_nz[imdex] += i + 2; 2273 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2274 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2275 count--; 2276 } 2277 /* wait on sends of nzlocal */ 2278 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2279 /* send mat->i,j to others, and recv from other's */ 2280 /*------------------------------------------------*/ 2281 for (i=0; i<nsends; i++){ 2282 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2283 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2284 } 2285 /* wait on receives of mat->i,j */ 2286 /*------------------------------*/ 2287 count = nrecvs; 2288 while (count) { 2289 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2290 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2291 count--; 2292 } 2293 /* wait on sends of mat->i,j */ 2294 /*---------------------------*/ 2295 if (nsends) { 2296 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2297 } 2298 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2299 2300 /* post receives, send and receive mat->a */ 2301 /*----------------------------------------*/ 2302 for (imdex=0; imdex<nrecvs; imdex++) { 2303 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2304 } 2305 for (i=0; i<nsends; i++){ 2306 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2307 } 2308 count = nrecvs; 2309 while (count) { 2310 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2311 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2312 count--; 2313 } 2314 if (nsends) { 2315 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2316 } 2317 2318 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2319 2320 /* create redundant matrix */ 2321 /*-------------------------*/ 2322 if (reuse == MAT_INITIAL_MATRIX){ 2323 /* compute rownz_max for preallocation */ 2324 for (imdex=0; imdex<nrecvs; imdex++){ 2325 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2326 rptr = rbuf_j[imdex]; 2327 for (i=0; i<j; i++){ 2328 ncols = rptr[i+1] - rptr[i]; 2329 if (rownz_max < ncols) rownz_max = ncols; 2330 } 2331 } 2332 2333 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2334 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2335 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2336 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2337 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 } else { 2339 C = *matredundant; 2340 } 2341 2342 /* insert local matrix entries */ 2343 rptr = sbuf_j; 2344 cols = sbuf_j + rend-rstart + 1; 2345 vals = sbuf_a; 2346 for (i=0; i<rend-rstart; i++){ 2347 row = i + rstart; 2348 ncols = rptr[i+1] - rptr[i]; 2349 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2350 vals += ncols; 2351 cols += ncols; 2352 } 2353 /* insert received matrix entries */ 2354 for (imdex=0; imdex<nrecvs; imdex++){ 2355 rstart = rowrange[recv_rank[imdex]]; 2356 rend = rowrange[recv_rank[imdex]+1]; 2357 rptr = rbuf_j[imdex]; 2358 cols = rbuf_j[imdex] + rend-rstart + 1; 2359 vals = rbuf_a[imdex]; 2360 for (i=0; i<rend-rstart; i++){ 2361 row = i + rstart; 2362 ncols = rptr[i+1] - rptr[i]; 2363 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2364 vals += ncols; 2365 cols += ncols; 2366 } 2367 } 2368 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2371 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2372 if (reuse == MAT_INITIAL_MATRIX){ 2373 PetscContainer container; 2374 *matredundant = C; 2375 /* create a supporting struct and attach it to C for reuse */ 2376 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2377 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2378 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2379 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2380 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2381 2382 redund->nzlocal = nzlocal; 2383 redund->nsends = nsends; 2384 redund->nrecvs = nrecvs; 2385 redund->send_rank = send_rank; 2386 redund->recv_rank = recv_rank; 2387 redund->sbuf_nz = sbuf_nz; 2388 redund->rbuf_nz = rbuf_nz; 2389 redund->sbuf_j = sbuf_j; 2390 redund->sbuf_a = sbuf_a; 2391 redund->rbuf_j = rbuf_j; 2392 redund->rbuf_a = rbuf_a; 2393 2394 redund->MatDestroy = C->ops->destroy; 2395 C->ops->destroy = MatDestroy_MatRedundant; 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2402 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2403 { 2404 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2405 PetscErrorCode ierr; 2406 PetscInt i,*idxb = 0; 2407 PetscScalar *va,*vb; 2408 Vec vtmp; 2409 2410 PetscFunctionBegin; 2411 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2412 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2413 if (idx) { 2414 for (i=0; i<A->rmap->n; i++) { 2415 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2416 } 2417 } 2418 2419 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2420 if (idx) { 2421 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2422 } 2423 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2424 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2425 2426 for (i=0; i<A->rmap->n; i++){ 2427 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2428 va[i] = vb[i]; 2429 if (idx) idx[i] = a->garray[idxb[i]]; 2430 } 2431 } 2432 2433 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2434 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2435 if (idxb) { 2436 ierr = PetscFree(idxb);CHKERRQ(ierr); 2437 } 2438 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2444 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2445 { 2446 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2447 PetscErrorCode ierr; 2448 PetscInt i,*idxb = 0; 2449 PetscScalar *va,*vb; 2450 Vec vtmp; 2451 2452 PetscFunctionBegin; 2453 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2454 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2455 if (idx) { 2456 for (i=0; i<A->cmap->n; i++) { 2457 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2458 } 2459 } 2460 2461 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2462 if (idx) { 2463 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2464 } 2465 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2466 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2467 2468 for (i=0; i<A->rmap->n; i++){ 2469 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2470 va[i] = vb[i]; 2471 if (idx) idx[i] = a->garray[idxb[i]]; 2472 } 2473 } 2474 2475 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2476 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2477 if (idxb) { 2478 ierr = PetscFree(idxb);CHKERRQ(ierr); 2479 } 2480 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2486 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2487 { 2488 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2489 PetscInt n = A->rmap->n; 2490 PetscInt cstart = A->cmap->rstart; 2491 PetscInt *cmap = mat->garray; 2492 PetscInt *diagIdx, *offdiagIdx; 2493 Vec diagV, offdiagV; 2494 PetscScalar *a, *diagA, *offdiagA; 2495 PetscInt r; 2496 PetscErrorCode ierr; 2497 2498 PetscFunctionBegin; 2499 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2500 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2502 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2504 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2505 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2506 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2507 for(r = 0; r < n; ++r) { 2508 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2509 a[r] = diagA[r]; 2510 idx[r] = cstart + diagIdx[r]; 2511 } else { 2512 a[r] = offdiagA[r]; 2513 idx[r] = cmap[offdiagIdx[r]]; 2514 } 2515 } 2516 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2517 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2519 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2520 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2521 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2527 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2528 { 2529 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2530 PetscInt n = A->rmap->n; 2531 PetscInt cstart = A->cmap->rstart; 2532 PetscInt *cmap = mat->garray; 2533 PetscInt *diagIdx, *offdiagIdx; 2534 Vec diagV, offdiagV; 2535 PetscScalar *a, *diagA, *offdiagA; 2536 PetscInt r; 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2541 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2543 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2545 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2546 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2547 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2548 for(r = 0; r < n; ++r) { 2549 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2550 a[r] = diagA[r]; 2551 idx[r] = cstart + diagIdx[r]; 2552 } else { 2553 a[r] = offdiagA[r]; 2554 idx[r] = cmap[offdiagIdx[r]]; 2555 } 2556 } 2557 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2558 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2560 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2561 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2562 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 #undef __FUNCT__ 2567 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2568 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2569 { 2570 PetscErrorCode ierr; 2571 Mat *dummy; 2572 2573 PetscFunctionBegin; 2574 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2575 *newmat = *dummy; 2576 ierr = PetscFree(dummy);CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2581 /* -------------------------------------------------------------------*/ 2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2583 MatGetRow_MPIAIJ, 2584 MatRestoreRow_MPIAIJ, 2585 MatMult_MPIAIJ, 2586 /* 4*/ MatMultAdd_MPIAIJ, 2587 MatMultTranspose_MPIAIJ, 2588 MatMultTransposeAdd_MPIAIJ, 2589 #ifdef PETSC_HAVE_PBGL 2590 MatSolve_MPIAIJ, 2591 #else 2592 0, 2593 #endif 2594 0, 2595 0, 2596 /*10*/ 0, 2597 0, 2598 0, 2599 MatSOR_MPIAIJ, 2600 MatTranspose_MPIAIJ, 2601 /*15*/ MatGetInfo_MPIAIJ, 2602 MatEqual_MPIAIJ, 2603 MatGetDiagonal_MPIAIJ, 2604 MatDiagonalScale_MPIAIJ, 2605 MatNorm_MPIAIJ, 2606 /*20*/ MatAssemblyBegin_MPIAIJ, 2607 MatAssemblyEnd_MPIAIJ, 2608 MatSetOption_MPIAIJ, 2609 MatZeroEntries_MPIAIJ, 2610 /*24*/ MatZeroRows_MPIAIJ, 2611 0, 2612 #ifdef PETSC_HAVE_PBGL 2613 0, 2614 #else 2615 0, 2616 #endif 2617 0, 2618 0, 2619 /*29*/ MatSetUpPreallocation_MPIAIJ, 2620 #ifdef PETSC_HAVE_PBGL 2621 0, 2622 #else 2623 0, 2624 #endif 2625 0, 2626 0, 2627 0, 2628 /*34*/ MatDuplicate_MPIAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*39*/ MatAXPY_MPIAIJ, 2634 MatGetSubMatrices_MPIAIJ, 2635 MatIncreaseOverlap_MPIAIJ, 2636 MatGetValues_MPIAIJ, 2637 MatCopy_MPIAIJ, 2638 /*44*/ MatGetRowMax_MPIAIJ, 2639 MatScale_MPIAIJ, 2640 0, 2641 0, 2642 0, 2643 /*49*/ MatSetBlockSize_MPIAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*54*/ MatFDColoringCreate_MPIAIJ, 2649 0, 2650 MatSetUnfactored_MPIAIJ, 2651 MatPermute_MPIAIJ, 2652 0, 2653 /*59*/ MatGetSubMatrix_MPIAIJ, 2654 MatDestroy_MPIAIJ, 2655 MatView_MPIAIJ, 2656 0, 2657 0, 2658 /*64*/ 0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2664 MatGetRowMinAbs_MPIAIJ, 2665 0, 2666 MatSetColoring_MPIAIJ, 2667 #if defined(PETSC_HAVE_ADIC) 2668 MatSetValuesAdic_MPIAIJ, 2669 #else 2670 0, 2671 #endif 2672 MatSetValuesAdifor_MPIAIJ, 2673 /*75*/ MatFDColoringApply_AIJ, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*80*/ 0, 2679 0, 2680 0, 2681 /*83*/ MatLoad_MPIAIJ, 2682 0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2688 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2689 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2690 MatPtAP_Basic, 2691 MatPtAPSymbolic_MPIAIJ, 2692 /*94*/ MatPtAPNumeric_MPIAIJ, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /*99*/ 0, 2698 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2699 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2700 MatConjugate_MPIAIJ, 2701 0, 2702 /*104*/MatSetValuesRow_MPIAIJ, 2703 MatRealPart_MPIAIJ, 2704 MatImaginaryPart_MPIAIJ, 2705 0, 2706 0, 2707 /*109*/0, 2708 MatGetRedundantMatrix_MPIAIJ, 2709 MatGetRowMin_MPIAIJ, 2710 0, 2711 0, 2712 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2713 0, 2714 0, 2715 0, 2716 0, 2717 0 2718 }; 2719 2720 /* ----------------------------------------------------------------------------------------*/ 2721 2722 EXTERN_C_BEGIN 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2725 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2726 { 2727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2732 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 EXTERN_C_END 2736 2737 EXTERN_C_BEGIN 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2740 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2741 { 2742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2743 PetscErrorCode ierr; 2744 2745 PetscFunctionBegin; 2746 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2747 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 EXTERN_C_END 2751 2752 #include "petscpc.h" 2753 EXTERN_C_BEGIN 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2756 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2757 { 2758 Mat_MPIAIJ *b; 2759 PetscErrorCode ierr; 2760 PetscInt i; 2761 2762 PetscFunctionBegin; 2763 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2764 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2765 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2766 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2767 2768 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2772 if (d_nnz) { 2773 for (i=0; i<B->rmap->n; i++) { 2774 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2775 } 2776 } 2777 if (o_nnz) { 2778 for (i=0; i<B->rmap->n; i++) { 2779 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2780 } 2781 } 2782 b = (Mat_MPIAIJ*)B->data; 2783 2784 if (!B->preallocated) { 2785 /* Explicitly create 2 MATSEQAIJ matrices. */ 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2788 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2790 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2791 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2792 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2794 } 2795 2796 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2797 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2798 B->preallocated = PETSC_TRUE; 2799 PetscFunctionReturn(0); 2800 } 2801 EXTERN_C_END 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2805 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2806 { 2807 Mat mat; 2808 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2809 PetscErrorCode ierr; 2810 2811 PetscFunctionBegin; 2812 *newmat = 0; 2813 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2814 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2815 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2816 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2817 a = (Mat_MPIAIJ*)mat->data; 2818 2819 mat->factor = matin->factor; 2820 mat->rmap->bs = matin->rmap->bs; 2821 mat->assembled = PETSC_TRUE; 2822 mat->insertmode = NOT_SET_VALUES; 2823 mat->preallocated = PETSC_TRUE; 2824 2825 a->size = oldmat->size; 2826 a->rank = oldmat->rank; 2827 a->donotstash = oldmat->donotstash; 2828 a->roworiented = oldmat->roworiented; 2829 a->rowindices = 0; 2830 a->rowvalues = 0; 2831 a->getrowactive = PETSC_FALSE; 2832 2833 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2834 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2835 2836 if (oldmat->colmap) { 2837 #if defined (PETSC_USE_CTABLE) 2838 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2839 #else 2840 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2841 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2843 #endif 2844 } else a->colmap = 0; 2845 if (oldmat->garray) { 2846 PetscInt len; 2847 len = oldmat->B->cmap->n; 2848 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2849 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2850 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2851 } else a->garray = 0; 2852 2853 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2855 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2857 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2859 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2861 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2862 *newmat = mat; 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "MatLoad_MPIAIJ" 2868 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2869 { 2870 Mat A; 2871 PetscScalar *vals,*svals; 2872 MPI_Comm comm = ((PetscObject)viewer)->comm; 2873 MPI_Status status; 2874 PetscErrorCode ierr; 2875 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2876 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2877 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2878 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2879 PetscInt cend,cstart,n,*rowners; 2880 int fd; 2881 2882 PetscFunctionBegin; 2883 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2884 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2885 if (!rank) { 2886 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2887 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2888 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2889 } 2890 2891 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2892 M = header[1]; N = header[2]; 2893 /* determine ownership of all rows */ 2894 m = M/size + ((M % size) > rank); 2895 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2896 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2897 2898 /* First process needs enough room for process with most rows */ 2899 if (!rank) { 2900 mmax = rowners[1]; 2901 for (i=2; i<size; i++) { 2902 mmax = PetscMax(mmax,rowners[i]); 2903 } 2904 } else mmax = m; 2905 2906 rowners[0] = 0; 2907 for (i=2; i<=size; i++) { 2908 rowners[i] += rowners[i-1]; 2909 } 2910 rstart = rowners[rank]; 2911 rend = rowners[rank+1]; 2912 2913 /* distribute row lengths to all processors */ 2914 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2915 if (!rank) { 2916 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2917 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2918 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2919 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2920 for (j=0; j<m; j++) { 2921 procsnz[0] += ourlens[j]; 2922 } 2923 for (i=1; i<size; i++) { 2924 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2925 /* calculate the number of nonzeros on each processor */ 2926 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2927 procsnz[i] += rowlengths[j]; 2928 } 2929 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2930 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2931 } 2932 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2933 } else { 2934 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2935 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2936 } 2937 2938 if (!rank) { 2939 /* determine max buffer needed and allocate it */ 2940 maxnz = 0; 2941 for (i=0; i<size; i++) { 2942 maxnz = PetscMax(maxnz,procsnz[i]); 2943 } 2944 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2945 2946 /* read in my part of the matrix column indices */ 2947 nz = procsnz[0]; 2948 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2949 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2950 2951 /* read in every one elses and ship off */ 2952 for (i=1; i<size; i++) { 2953 nz = procsnz[i]; 2954 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2955 mpicnt = PetscMPIIntCast(nz); 2956 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2957 } 2958 ierr = PetscFree(cols);CHKERRQ(ierr); 2959 } else { 2960 /* determine buffer space needed for message */ 2961 nz = 0; 2962 for (i=0; i<m; i++) { 2963 nz += ourlens[i]; 2964 } 2965 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2966 2967 /* receive message of column indices*/ 2968 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2969 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2970 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2971 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2972 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2973 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2974 } 2975 2976 /* determine column ownership if matrix is not square */ 2977 if (N != M) { 2978 n = N/size + ((N % size) > rank); 2979 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2980 cstart = cend - n; 2981 } else { 2982 cstart = rstart; 2983 cend = rend; 2984 n = cend - cstart; 2985 } 2986 2987 /* loop over local rows, determining number of off diagonal entries */ 2988 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2989 jj = 0; 2990 for (i=0; i<m; i++) { 2991 for (j=0; j<ourlens[i]; j++) { 2992 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2993 jj++; 2994 } 2995 } 2996 2997 /* create our matrix */ 2998 for (i=0; i<m; i++) { 2999 ourlens[i] -= offlens[i]; 3000 } 3001 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3002 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3003 ierr = MatSetType(A,type);CHKERRQ(ierr); 3004 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3005 3006 for (i=0; i<m; i++) { 3007 ourlens[i] += offlens[i]; 3008 } 3009 3010 if (!rank) { 3011 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3012 3013 /* read in my part of the matrix numerical values */ 3014 nz = procsnz[0]; 3015 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3016 3017 /* insert into matrix */ 3018 jj = rstart; 3019 smycols = mycols; 3020 svals = vals; 3021 for (i=0; i<m; i++) { 3022 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3023 smycols += ourlens[i]; 3024 svals += ourlens[i]; 3025 jj++; 3026 } 3027 3028 /* read in other processors and ship out */ 3029 for (i=1; i<size; i++) { 3030 nz = procsnz[i]; 3031 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3032 mpicnt = PetscMPIIntCast(nz); 3033 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3034 } 3035 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3036 } else { 3037 /* receive numeric values */ 3038 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3039 3040 /* receive message of values*/ 3041 mpicnt = PetscMPIIntCast(nz); 3042 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3043 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3044 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3045 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3046 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3047 3048 /* insert into matrix */ 3049 jj = rstart; 3050 smycols = mycols; 3051 svals = vals; 3052 for (i=0; i<m; i++) { 3053 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3054 smycols += ourlens[i]; 3055 svals += ourlens[i]; 3056 jj++; 3057 } 3058 } 3059 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3060 ierr = PetscFree(vals);CHKERRQ(ierr); 3061 ierr = PetscFree(mycols);CHKERRQ(ierr); 3062 ierr = PetscFree(rowners);CHKERRQ(ierr); 3063 3064 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3065 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 *newmat = A; 3067 PetscFunctionReturn(0); 3068 } 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3072 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3073 { 3074 PetscErrorCode ierr; 3075 IS iscol_local; 3076 PetscInt csize; 3077 3078 PetscFunctionBegin; 3079 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3080 if (call == MAT_REUSE_MATRIX) { 3081 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3082 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3083 } else { 3084 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3085 } 3086 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3087 if (call == MAT_INITIAL_MATRIX) { 3088 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3089 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3090 } 3091 PetscFunctionReturn(0); 3092 } 3093 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3096 /* 3097 Not great since it makes two copies of the submatrix, first an SeqAIJ 3098 in local and then by concatenating the local matrices the end result. 3099 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3100 3101 Note: This requires a sequential iscol with all indices. 3102 */ 3103 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3104 { 3105 PetscErrorCode ierr; 3106 PetscMPIInt rank,size; 3107 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3108 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3109 Mat *local,M,Mreuse; 3110 MatScalar *vwork,*aa; 3111 MPI_Comm comm = ((PetscObject)mat)->comm; 3112 Mat_SeqAIJ *aij; 3113 3114 3115 PetscFunctionBegin; 3116 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3117 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3118 3119 if (call == MAT_REUSE_MATRIX) { 3120 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3121 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3122 local = &Mreuse; 3123 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3124 } else { 3125 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3126 Mreuse = *local; 3127 ierr = PetscFree(local);CHKERRQ(ierr); 3128 } 3129 3130 /* 3131 m - number of local rows 3132 n - number of columns (same on all processors) 3133 rstart - first row in new global matrix generated 3134 */ 3135 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3136 if (call == MAT_INITIAL_MATRIX) { 3137 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3138 ii = aij->i; 3139 jj = aij->j; 3140 3141 /* 3142 Determine the number of non-zeros in the diagonal and off-diagonal 3143 portions of the matrix in order to do correct preallocation 3144 */ 3145 3146 /* first get start and end of "diagonal" columns */ 3147 if (csize == PETSC_DECIDE) { 3148 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3149 if (mglobal == n) { /* square matrix */ 3150 nlocal = m; 3151 } else { 3152 nlocal = n/size + ((n % size) > rank); 3153 } 3154 } else { 3155 nlocal = csize; 3156 } 3157 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3158 rstart = rend - nlocal; 3159 if (rank == size - 1 && rend != n) { 3160 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3161 } 3162 3163 /* next, compute all the lengths */ 3164 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3165 olens = dlens + m; 3166 for (i=0; i<m; i++) { 3167 jend = ii[i+1] - ii[i]; 3168 olen = 0; 3169 dlen = 0; 3170 for (j=0; j<jend; j++) { 3171 if (*jj < rstart || *jj >= rend) olen++; 3172 else dlen++; 3173 jj++; 3174 } 3175 olens[i] = olen; 3176 dlens[i] = dlen; 3177 } 3178 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3179 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3180 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3181 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3182 ierr = PetscFree(dlens);CHKERRQ(ierr); 3183 } else { 3184 PetscInt ml,nl; 3185 3186 M = *newmat; 3187 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3188 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3189 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3190 /* 3191 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3192 rather than the slower MatSetValues(). 3193 */ 3194 M->was_assembled = PETSC_TRUE; 3195 M->assembled = PETSC_FALSE; 3196 } 3197 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3198 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3199 ii = aij->i; 3200 jj = aij->j; 3201 aa = aij->a; 3202 for (i=0; i<m; i++) { 3203 row = rstart + i; 3204 nz = ii[i+1] - ii[i]; 3205 cwork = jj; jj += nz; 3206 vwork = aa; aa += nz; 3207 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3208 } 3209 3210 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3211 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3212 *newmat = M; 3213 3214 /* save submatrix used in processor for next request */ 3215 if (call == MAT_INITIAL_MATRIX) { 3216 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3217 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3218 } 3219 3220 PetscFunctionReturn(0); 3221 } 3222 3223 EXTERN_C_BEGIN 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3226 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3227 { 3228 PetscInt m,cstart, cend,j,nnz,i,d; 3229 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3230 const PetscInt *JJ; 3231 PetscScalar *values; 3232 PetscErrorCode ierr; 3233 3234 PetscFunctionBegin; 3235 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3236 3237 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3238 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3239 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3241 m = B->rmap->n; 3242 cstart = B->cmap->rstart; 3243 cend = B->cmap->rend; 3244 rstart = B->rmap->rstart; 3245 3246 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3247 3248 #if defined(PETSC_USE_DEBUGGING) 3249 for (i=0; i<m; i++) { 3250 nnz = Ii[i+1]- Ii[i]; 3251 JJ = J + Ii[i]; 3252 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3253 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3254 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3255 } 3256 #endif 3257 3258 for (i=0; i<m; i++) { 3259 nnz = Ii[i+1]- Ii[i]; 3260 JJ = J + Ii[i]; 3261 nnz_max = PetscMax(nnz_max,nnz); 3262 d = 0; 3263 for (j=0; j<nnz; j++) { 3264 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3265 } 3266 d_nnz[i] = d; 3267 o_nnz[i] = nnz - d; 3268 } 3269 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3270 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3271 3272 if (v) values = (PetscScalar*)v; 3273 else { 3274 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3275 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3276 } 3277 3278 for (i=0; i<m; i++) { 3279 ii = i + rstart; 3280 nnz = Ii[i+1]- Ii[i]; 3281 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3282 } 3283 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3285 3286 if (!v) { 3287 ierr = PetscFree(values);CHKERRQ(ierr); 3288 } 3289 PetscFunctionReturn(0); 3290 } 3291 EXTERN_C_END 3292 3293 #undef __FUNCT__ 3294 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3295 /*@ 3296 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3297 (the default parallel PETSc format). 3298 3299 Collective on MPI_Comm 3300 3301 Input Parameters: 3302 + B - the matrix 3303 . i - the indices into j for the start of each local row (starts with zero) 3304 . j - the column indices for each local row (starts with zero) 3305 - v - optional values in the matrix 3306 3307 Level: developer 3308 3309 Notes: 3310 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3311 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3312 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3313 3314 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3315 3316 The format which is used for the sparse matrix input, is equivalent to a 3317 row-major ordering.. i.e for the following matrix, the input data expected is 3318 as shown: 3319 3320 1 0 0 3321 2 0 3 P0 3322 ------- 3323 4 5 6 P1 3324 3325 Process0 [P0]: rows_owned=[0,1] 3326 i = {0,1,3} [size = nrow+1 = 2+1] 3327 j = {0,0,2} [size = nz = 6] 3328 v = {1,2,3} [size = nz = 6] 3329 3330 Process1 [P1]: rows_owned=[2] 3331 i = {0,3} [size = nrow+1 = 1+1] 3332 j = {0,1,2} [size = nz = 6] 3333 v = {4,5,6} [size = nz = 6] 3334 3335 .keywords: matrix, aij, compressed row, sparse, parallel 3336 3337 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3338 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3339 @*/ 3340 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3341 { 3342 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3343 3344 PetscFunctionBegin; 3345 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3346 if (f) { 3347 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3348 } 3349 PetscFunctionReturn(0); 3350 } 3351 3352 #undef __FUNCT__ 3353 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3354 /*@C 3355 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3356 (the default parallel PETSc format). For good matrix assembly performance 3357 the user should preallocate the matrix storage by setting the parameters 3358 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3359 performance can be increased by more than a factor of 50. 3360 3361 Collective on MPI_Comm 3362 3363 Input Parameters: 3364 + A - the matrix 3365 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3366 (same value is used for all local rows) 3367 . d_nnz - array containing the number of nonzeros in the various rows of the 3368 DIAGONAL portion of the local submatrix (possibly different for each row) 3369 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3370 The size of this array is equal to the number of local rows, i.e 'm'. 3371 You must leave room for the diagonal entry even if it is zero. 3372 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3373 submatrix (same value is used for all local rows). 3374 - o_nnz - array containing the number of nonzeros in the various rows of the 3375 OFF-DIAGONAL portion of the local submatrix (possibly different for 3376 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3377 structure. The size of this array is equal to the number 3378 of local rows, i.e 'm'. 3379 3380 If the *_nnz parameter is given then the *_nz parameter is ignored 3381 3382 The AIJ format (also called the Yale sparse matrix format or 3383 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3384 storage. The stored row and column indices begin with zero. See the users manual for details. 3385 3386 The parallel matrix is partitioned such that the first m0 rows belong to 3387 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3388 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3389 3390 The DIAGONAL portion of the local submatrix of a processor can be defined 3391 as the submatrix which is obtained by extraction the part corresponding 3392 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3393 first row that belongs to the processor, and r2 is the last row belonging 3394 to the this processor. This is a square mxm matrix. The remaining portion 3395 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3396 3397 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3398 3399 You can call MatGetInfo() to get information on how effective the preallocation was; 3400 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3401 You can also run with the option -info and look for messages with the string 3402 malloc in them to see if additional memory allocation was needed. 3403 3404 Example usage: 3405 3406 Consider the following 8x8 matrix with 34 non-zero values, that is 3407 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3408 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3409 as follows: 3410 3411 .vb 3412 1 2 0 | 0 3 0 | 0 4 3413 Proc0 0 5 6 | 7 0 0 | 8 0 3414 9 0 10 | 11 0 0 | 12 0 3415 ------------------------------------- 3416 13 0 14 | 15 16 17 | 0 0 3417 Proc1 0 18 0 | 19 20 21 | 0 0 3418 0 0 0 | 22 23 0 | 24 0 3419 ------------------------------------- 3420 Proc2 25 26 27 | 0 0 28 | 29 0 3421 30 0 0 | 31 32 33 | 0 34 3422 .ve 3423 3424 This can be represented as a collection of submatrices as: 3425 3426 .vb 3427 A B C 3428 D E F 3429 G H I 3430 .ve 3431 3432 Where the submatrices A,B,C are owned by proc0, D,E,F are 3433 owned by proc1, G,H,I are owned by proc2. 3434 3435 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3436 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3437 The 'M','N' parameters are 8,8, and have the same values on all procs. 3438 3439 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3440 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3441 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3442 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3443 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3444 matrix, ans [DF] as another SeqAIJ matrix. 3445 3446 When d_nz, o_nz parameters are specified, d_nz storage elements are 3447 allocated for every row of the local diagonal submatrix, and o_nz 3448 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3449 One way to choose d_nz and o_nz is to use the max nonzerors per local 3450 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3451 In this case, the values of d_nz,o_nz are: 3452 .vb 3453 proc0 : dnz = 2, o_nz = 2 3454 proc1 : dnz = 3, o_nz = 2 3455 proc2 : dnz = 1, o_nz = 4 3456 .ve 3457 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3458 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3459 for proc3. i.e we are using 12+15+10=37 storage locations to store 3460 34 values. 3461 3462 When d_nnz, o_nnz parameters are specified, the storage is specified 3463 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3464 In the above case the values for d_nnz,o_nnz are: 3465 .vb 3466 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3467 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3468 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3469 .ve 3470 Here the space allocated is sum of all the above values i.e 34, and 3471 hence pre-allocation is perfect. 3472 3473 Level: intermediate 3474 3475 .keywords: matrix, aij, compressed row, sparse, parallel 3476 3477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3478 MPIAIJ, MatGetInfo() 3479 @*/ 3480 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3481 { 3482 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3483 3484 PetscFunctionBegin; 3485 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3486 if (f) { 3487 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3488 } 3489 PetscFunctionReturn(0); 3490 } 3491 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3494 /*@ 3495 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3496 CSR format the local rows. 3497 3498 Collective on MPI_Comm 3499 3500 Input Parameters: 3501 + comm - MPI communicator 3502 . m - number of local rows (Cannot be PETSC_DECIDE) 3503 . n - This value should be the same as the local size used in creating the 3504 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3505 calculated if N is given) For square matrices n is almost always m. 3506 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3507 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3508 . i - row indices 3509 . j - column indices 3510 - a - matrix values 3511 3512 Output Parameter: 3513 . mat - the matrix 3514 3515 Level: intermediate 3516 3517 Notes: 3518 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3519 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3520 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3521 3522 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3523 3524 The format which is used for the sparse matrix input, is equivalent to a 3525 row-major ordering.. i.e for the following matrix, the input data expected is 3526 as shown: 3527 3528 1 0 0 3529 2 0 3 P0 3530 ------- 3531 4 5 6 P1 3532 3533 Process0 [P0]: rows_owned=[0,1] 3534 i = {0,1,3} [size = nrow+1 = 2+1] 3535 j = {0,0,2} [size = nz = 6] 3536 v = {1,2,3} [size = nz = 6] 3537 3538 Process1 [P1]: rows_owned=[2] 3539 i = {0,3} [size = nrow+1 = 1+1] 3540 j = {0,1,2} [size = nz = 6] 3541 v = {4,5,6} [size = nz = 6] 3542 3543 .keywords: matrix, aij, compressed row, sparse, parallel 3544 3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3546 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3547 @*/ 3548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3549 { 3550 PetscErrorCode ierr; 3551 3552 PetscFunctionBegin; 3553 if (i[0]) { 3554 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3555 } 3556 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3557 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3558 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3559 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3560 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatCreateMPIAIJ" 3566 /*@C 3567 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3568 (the default parallel PETSc format). For good matrix assembly performance 3569 the user should preallocate the matrix storage by setting the parameters 3570 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3571 performance can be increased by more than a factor of 50. 3572 3573 Collective on MPI_Comm 3574 3575 Input Parameters: 3576 + comm - MPI communicator 3577 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3578 This value should be the same as the local size used in creating the 3579 y vector for the matrix-vector product y = Ax. 3580 . n - This value should be the same as the local size used in creating the 3581 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3582 calculated if N is given) For square matrices n is almost always m. 3583 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3584 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3585 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3586 (same value is used for all local rows) 3587 . d_nnz - array containing the number of nonzeros in the various rows of the 3588 DIAGONAL portion of the local submatrix (possibly different for each row) 3589 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3590 The size of this array is equal to the number of local rows, i.e 'm'. 3591 You must leave room for the diagonal entry even if it is zero. 3592 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3593 submatrix (same value is used for all local rows). 3594 - o_nnz - array containing the number of nonzeros in the various rows of the 3595 OFF-DIAGONAL portion of the local submatrix (possibly different for 3596 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3597 structure. The size of this array is equal to the number 3598 of local rows, i.e 'm'. 3599 3600 Output Parameter: 3601 . A - the matrix 3602 3603 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3604 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3605 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3606 3607 Notes: 3608 If the *_nnz parameter is given then the *_nz parameter is ignored 3609 3610 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3611 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3612 storage requirements for this matrix. 3613 3614 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3615 processor than it must be used on all processors that share the object for 3616 that argument. 3617 3618 The user MUST specify either the local or global matrix dimensions 3619 (possibly both). 3620 3621 The parallel matrix is partitioned across processors such that the 3622 first m0 rows belong to process 0, the next m1 rows belong to 3623 process 1, the next m2 rows belong to process 2 etc.. where 3624 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3625 values corresponding to [m x N] submatrix. 3626 3627 The columns are logically partitioned with the n0 columns belonging 3628 to 0th partition, the next n1 columns belonging to the next 3629 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3630 3631 The DIAGONAL portion of the local submatrix on any given processor 3632 is the submatrix corresponding to the rows and columns m,n 3633 corresponding to the given processor. i.e diagonal matrix on 3634 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3635 etc. The remaining portion of the local submatrix [m x (N-n)] 3636 constitute the OFF-DIAGONAL portion. The example below better 3637 illustrates this concept. 3638 3639 For a square global matrix we define each processor's diagonal portion 3640 to be its local rows and the corresponding columns (a square submatrix); 3641 each processor's off-diagonal portion encompasses the remainder of the 3642 local matrix (a rectangular submatrix). 3643 3644 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3645 3646 When calling this routine with a single process communicator, a matrix of 3647 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3648 type of communicator, use the construction mechanism: 3649 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3650 3651 By default, this format uses inodes (identical nodes) when possible. 3652 We search for consecutive rows with the same nonzero structure, thereby 3653 reusing matrix information to achieve increased efficiency. 3654 3655 Options Database Keys: 3656 + -mat_no_inode - Do not use inodes 3657 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3658 - -mat_aij_oneindex - Internally use indexing starting at 1 3659 rather than 0. Note that when calling MatSetValues(), 3660 the user still MUST index entries starting at 0! 3661 3662 3663 Example usage: 3664 3665 Consider the following 8x8 matrix with 34 non-zero values, that is 3666 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3667 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3668 as follows: 3669 3670 .vb 3671 1 2 0 | 0 3 0 | 0 4 3672 Proc0 0 5 6 | 7 0 0 | 8 0 3673 9 0 10 | 11 0 0 | 12 0 3674 ------------------------------------- 3675 13 0 14 | 15 16 17 | 0 0 3676 Proc1 0 18 0 | 19 20 21 | 0 0 3677 0 0 0 | 22 23 0 | 24 0 3678 ------------------------------------- 3679 Proc2 25 26 27 | 0 0 28 | 29 0 3680 30 0 0 | 31 32 33 | 0 34 3681 .ve 3682 3683 This can be represented as a collection of submatrices as: 3684 3685 .vb 3686 A B C 3687 D E F 3688 G H I 3689 .ve 3690 3691 Where the submatrices A,B,C are owned by proc0, D,E,F are 3692 owned by proc1, G,H,I are owned by proc2. 3693 3694 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3695 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3696 The 'M','N' parameters are 8,8, and have the same values on all procs. 3697 3698 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3699 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3700 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3701 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3702 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3703 matrix, ans [DF] as another SeqAIJ matrix. 3704 3705 When d_nz, o_nz parameters are specified, d_nz storage elements are 3706 allocated for every row of the local diagonal submatrix, and o_nz 3707 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3708 One way to choose d_nz and o_nz is to use the max nonzerors per local 3709 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3710 In this case, the values of d_nz,o_nz are: 3711 .vb 3712 proc0 : dnz = 2, o_nz = 2 3713 proc1 : dnz = 3, o_nz = 2 3714 proc2 : dnz = 1, o_nz = 4 3715 .ve 3716 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3717 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3718 for proc3. i.e we are using 12+15+10=37 storage locations to store 3719 34 values. 3720 3721 When d_nnz, o_nnz parameters are specified, the storage is specified 3722 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3723 In the above case the values for d_nnz,o_nnz are: 3724 .vb 3725 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3726 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3727 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3728 .ve 3729 Here the space allocated is sum of all the above values i.e 34, and 3730 hence pre-allocation is perfect. 3731 3732 Level: intermediate 3733 3734 .keywords: matrix, aij, compressed row, sparse, parallel 3735 3736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3737 MPIAIJ, MatCreateMPIAIJWithArrays() 3738 @*/ 3739 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3740 { 3741 PetscErrorCode ierr; 3742 PetscMPIInt size; 3743 3744 PetscFunctionBegin; 3745 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3746 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3747 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3748 if (size > 1) { 3749 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3750 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3751 } else { 3752 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3753 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3754 } 3755 PetscFunctionReturn(0); 3756 } 3757 3758 #undef __FUNCT__ 3759 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3761 { 3762 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3763 3764 PetscFunctionBegin; 3765 *Ad = a->A; 3766 *Ao = a->B; 3767 *colmap = a->garray; 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3773 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3774 { 3775 PetscErrorCode ierr; 3776 PetscInt i; 3777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3778 3779 PetscFunctionBegin; 3780 if (coloring->ctype == IS_COLORING_GLOBAL) { 3781 ISColoringValue *allcolors,*colors; 3782 ISColoring ocoloring; 3783 3784 /* set coloring for diagonal portion */ 3785 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3786 3787 /* set coloring for off-diagonal portion */ 3788 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3789 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3790 for (i=0; i<a->B->cmap->n; i++) { 3791 colors[i] = allcolors[a->garray[i]]; 3792 } 3793 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3794 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3795 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3796 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3797 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3798 ISColoringValue *colors; 3799 PetscInt *larray; 3800 ISColoring ocoloring; 3801 3802 /* set coloring for diagonal portion */ 3803 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3804 for (i=0; i<a->A->cmap->n; i++) { 3805 larray[i] = i + A->cmap->rstart; 3806 } 3807 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3808 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3809 for (i=0; i<a->A->cmap->n; i++) { 3810 colors[i] = coloring->colors[larray[i]]; 3811 } 3812 ierr = PetscFree(larray);CHKERRQ(ierr); 3813 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3814 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3815 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3816 3817 /* set coloring for off-diagonal portion */ 3818 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3819 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3820 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3821 for (i=0; i<a->B->cmap->n; i++) { 3822 colors[i] = coloring->colors[larray[i]]; 3823 } 3824 ierr = PetscFree(larray);CHKERRQ(ierr); 3825 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3826 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3827 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3828 } else { 3829 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3830 } 3831 3832 PetscFunctionReturn(0); 3833 } 3834 3835 #if defined(PETSC_HAVE_ADIC) 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3838 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3839 { 3840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3841 PetscErrorCode ierr; 3842 3843 PetscFunctionBegin; 3844 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3845 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3846 PetscFunctionReturn(0); 3847 } 3848 #endif 3849 3850 #undef __FUNCT__ 3851 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3852 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3853 { 3854 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3855 PetscErrorCode ierr; 3856 3857 PetscFunctionBegin; 3858 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3859 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3860 PetscFunctionReturn(0); 3861 } 3862 3863 #undef __FUNCT__ 3864 #define __FUNCT__ "MatMerge" 3865 /*@ 3866 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3867 matrices from each processor 3868 3869 Collective on MPI_Comm 3870 3871 Input Parameters: 3872 + comm - the communicators the parallel matrix will live on 3873 . inmat - the input sequential matrices 3874 . n - number of local columns (or PETSC_DECIDE) 3875 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3876 3877 Output Parameter: 3878 . outmat - the parallel matrix generated 3879 3880 Level: advanced 3881 3882 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3883 3884 @*/ 3885 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3886 { 3887 PetscErrorCode ierr; 3888 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3889 PetscInt *indx; 3890 PetscScalar *values; 3891 3892 PetscFunctionBegin; 3893 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3894 if (scall == MAT_INITIAL_MATRIX){ 3895 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3896 if (n == PETSC_DECIDE){ 3897 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3898 } 3899 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3900 rstart -= m; 3901 3902 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3903 for (i=0;i<m;i++) { 3904 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3905 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3906 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3907 } 3908 /* This routine will ONLY return MPIAIJ type matrix */ 3909 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3910 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3911 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3912 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3913 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3914 3915 } else if (scall == MAT_REUSE_MATRIX){ 3916 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3917 } else { 3918 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3919 } 3920 3921 for (i=0;i<m;i++) { 3922 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3923 Ii = i + rstart; 3924 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3925 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3926 } 3927 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3928 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3929 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 3931 PetscFunctionReturn(0); 3932 } 3933 3934 #undef __FUNCT__ 3935 #define __FUNCT__ "MatFileSplit" 3936 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3937 { 3938 PetscErrorCode ierr; 3939 PetscMPIInt rank; 3940 PetscInt m,N,i,rstart,nnz; 3941 size_t len; 3942 const PetscInt *indx; 3943 PetscViewer out; 3944 char *name; 3945 Mat B; 3946 const PetscScalar *values; 3947 3948 PetscFunctionBegin; 3949 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3950 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3951 /* Should this be the type of the diagonal block of A? */ 3952 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3953 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3954 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3955 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3956 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3957 for (i=0;i<m;i++) { 3958 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3959 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3960 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3961 } 3962 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 3965 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3966 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3967 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3968 sprintf(name,"%s.%d",outfile,rank); 3969 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3970 ierr = PetscFree(name); 3971 ierr = MatView(B,out);CHKERRQ(ierr); 3972 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3973 ierr = MatDestroy(B);CHKERRQ(ierr); 3974 PetscFunctionReturn(0); 3975 } 3976 3977 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3978 #undef __FUNCT__ 3979 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3980 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3981 { 3982 PetscErrorCode ierr; 3983 Mat_Merge_SeqsToMPI *merge; 3984 PetscContainer container; 3985 3986 PetscFunctionBegin; 3987 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3988 if (container) { 3989 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4002 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4003 4004 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4005 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4006 } 4007 ierr = PetscFree(merge);CHKERRQ(ierr); 4008 4009 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4010 PetscFunctionReturn(0); 4011 } 4012 4013 #include "../src/mat/utils/freespace.h" 4014 #include "petscbt.h" 4015 4016 #undef __FUNCT__ 4017 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4018 /*@C 4019 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4020 matrices from each processor 4021 4022 Collective on MPI_Comm 4023 4024 Input Parameters: 4025 + comm - the communicators the parallel matrix will live on 4026 . seqmat - the input sequential matrices 4027 . m - number of local rows (or PETSC_DECIDE) 4028 . n - number of local columns (or PETSC_DECIDE) 4029 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4030 4031 Output Parameter: 4032 . mpimat - the parallel matrix generated 4033 4034 Level: advanced 4035 4036 Notes: 4037 The dimensions of the sequential matrix in each processor MUST be the same. 4038 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4039 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4040 @*/ 4041 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4042 { 4043 PetscErrorCode ierr; 4044 MPI_Comm comm=((PetscObject)mpimat)->comm; 4045 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4046 PetscMPIInt size,rank,taga,*len_s; 4047 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4048 PetscInt proc,m; 4049 PetscInt **buf_ri,**buf_rj; 4050 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4051 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4052 MPI_Request *s_waits,*r_waits; 4053 MPI_Status *status; 4054 MatScalar *aa=a->a; 4055 MatScalar **abuf_r,*ba_i; 4056 Mat_Merge_SeqsToMPI *merge; 4057 PetscContainer container; 4058 4059 PetscFunctionBegin; 4060 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4061 4062 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4063 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4064 4065 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4066 if (container) { 4067 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4068 } 4069 bi = merge->bi; 4070 bj = merge->bj; 4071 buf_ri = merge->buf_ri; 4072 buf_rj = merge->buf_rj; 4073 4074 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4075 owners = merge->rowmap->range; 4076 len_s = merge->len_s; 4077 4078 /* send and recv matrix values */ 4079 /*-----------------------------*/ 4080 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4081 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4082 4083 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4084 for (proc=0,k=0; proc<size; proc++){ 4085 if (!len_s[proc]) continue; 4086 i = owners[proc]; 4087 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4088 k++; 4089 } 4090 4091 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4092 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4093 ierr = PetscFree(status);CHKERRQ(ierr); 4094 4095 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4096 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4097 4098 /* insert mat values of mpimat */ 4099 /*----------------------------*/ 4100 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4101 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4102 4103 for (k=0; k<merge->nrecv; k++){ 4104 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4105 nrows = *(buf_ri_k[k]); 4106 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4107 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4108 } 4109 4110 /* set values of ba */ 4111 m = merge->rowmap->n; 4112 for (i=0; i<m; i++) { 4113 arow = owners[rank] + i; 4114 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4115 bnzi = bi[i+1] - bi[i]; 4116 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4117 4118 /* add local non-zero vals of this proc's seqmat into ba */ 4119 anzi = ai[arow+1] - ai[arow]; 4120 aj = a->j + ai[arow]; 4121 aa = a->a + ai[arow]; 4122 nextaj = 0; 4123 for (j=0; nextaj<anzi; j++){ 4124 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4125 ba_i[j] += aa[nextaj++]; 4126 } 4127 } 4128 4129 /* add received vals into ba */ 4130 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4131 /* i-th row */ 4132 if (i == *nextrow[k]) { 4133 anzi = *(nextai[k]+1) - *nextai[k]; 4134 aj = buf_rj[k] + *(nextai[k]); 4135 aa = abuf_r[k] + *(nextai[k]); 4136 nextaj = 0; 4137 for (j=0; nextaj<anzi; j++){ 4138 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4139 ba_i[j] += aa[nextaj++]; 4140 } 4141 } 4142 nextrow[k]++; nextai[k]++; 4143 } 4144 } 4145 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4146 } 4147 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 4150 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4151 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4152 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4153 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4154 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4155 PetscFunctionReturn(0); 4156 } 4157 4158 #undef __FUNCT__ 4159 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4160 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4161 { 4162 PetscErrorCode ierr; 4163 Mat B_mpi; 4164 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4165 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4166 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4167 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4168 PetscInt len,proc,*dnz,*onz; 4169 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4170 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4171 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4172 MPI_Status *status; 4173 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4174 PetscBT lnkbt; 4175 Mat_Merge_SeqsToMPI *merge; 4176 PetscContainer container; 4177 4178 PetscFunctionBegin; 4179 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4180 4181 /* make sure it is a PETSc comm */ 4182 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4185 4186 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4187 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4188 4189 /* determine row ownership */ 4190 /*---------------------------------------------------------*/ 4191 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4192 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4193 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4194 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4195 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4196 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4197 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4198 4199 m = merge->rowmap->n; 4200 M = merge->rowmap->N; 4201 owners = merge->rowmap->range; 4202 4203 /* determine the number of messages to send, their lengths */ 4204 /*---------------------------------------------------------*/ 4205 len_s = merge->len_s; 4206 4207 len = 0; /* length of buf_si[] */ 4208 merge->nsend = 0; 4209 for (proc=0; proc<size; proc++){ 4210 len_si[proc] = 0; 4211 if (proc == rank){ 4212 len_s[proc] = 0; 4213 } else { 4214 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4215 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4216 } 4217 if (len_s[proc]) { 4218 merge->nsend++; 4219 nrows = 0; 4220 for (i=owners[proc]; i<owners[proc+1]; i++){ 4221 if (ai[i+1] > ai[i]) nrows++; 4222 } 4223 len_si[proc] = 2*(nrows+1); 4224 len += len_si[proc]; 4225 } 4226 } 4227 4228 /* determine the number and length of messages to receive for ij-structure */ 4229 /*-------------------------------------------------------------------------*/ 4230 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4231 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4232 4233 /* post the Irecv of j-structure */ 4234 /*-------------------------------*/ 4235 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4236 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4237 4238 /* post the Isend of j-structure */ 4239 /*--------------------------------*/ 4240 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4241 4242 for (proc=0, k=0; proc<size; proc++){ 4243 if (!len_s[proc]) continue; 4244 i = owners[proc]; 4245 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4246 k++; 4247 } 4248 4249 /* receives and sends of j-structure are complete */ 4250 /*------------------------------------------------*/ 4251 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4252 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4253 4254 /* send and recv i-structure */ 4255 /*---------------------------*/ 4256 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4257 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4258 4259 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4260 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4261 for (proc=0,k=0; proc<size; proc++){ 4262 if (!len_s[proc]) continue; 4263 /* form outgoing message for i-structure: 4264 buf_si[0]: nrows to be sent 4265 [1:nrows]: row index (global) 4266 [nrows+1:2*nrows+1]: i-structure index 4267 */ 4268 /*-------------------------------------------*/ 4269 nrows = len_si[proc]/2 - 1; 4270 buf_si_i = buf_si + nrows+1; 4271 buf_si[0] = nrows; 4272 buf_si_i[0] = 0; 4273 nrows = 0; 4274 for (i=owners[proc]; i<owners[proc+1]; i++){ 4275 anzi = ai[i+1] - ai[i]; 4276 if (anzi) { 4277 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4278 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4279 nrows++; 4280 } 4281 } 4282 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4283 k++; 4284 buf_si += len_si[proc]; 4285 } 4286 4287 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4288 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4289 4290 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4291 for (i=0; i<merge->nrecv; i++){ 4292 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4293 } 4294 4295 ierr = PetscFree(len_si);CHKERRQ(ierr); 4296 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4297 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4298 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4299 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4300 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4301 ierr = PetscFree(status);CHKERRQ(ierr); 4302 4303 /* compute a local seq matrix in each processor */ 4304 /*----------------------------------------------*/ 4305 /* allocate bi array and free space for accumulating nonzero column info */ 4306 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4307 bi[0] = 0; 4308 4309 /* create and initialize a linked list */ 4310 nlnk = N+1; 4311 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4312 4313 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4314 len = 0; 4315 len = ai[owners[rank+1]] - ai[owners[rank]]; 4316 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4317 current_space = free_space; 4318 4319 /* determine symbolic info for each local row */ 4320 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4321 4322 for (k=0; k<merge->nrecv; k++){ 4323 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4324 nrows = *buf_ri_k[k]; 4325 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4326 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4327 } 4328 4329 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4330 len = 0; 4331 for (i=0;i<m;i++) { 4332 bnzi = 0; 4333 /* add local non-zero cols of this proc's seqmat into lnk */ 4334 arow = owners[rank] + i; 4335 anzi = ai[arow+1] - ai[arow]; 4336 aj = a->j + ai[arow]; 4337 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4338 bnzi += nlnk; 4339 /* add received col data into lnk */ 4340 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4341 if (i == *nextrow[k]) { /* i-th row */ 4342 anzi = *(nextai[k]+1) - *nextai[k]; 4343 aj = buf_rj[k] + *nextai[k]; 4344 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4345 bnzi += nlnk; 4346 nextrow[k]++; nextai[k]++; 4347 } 4348 } 4349 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4350 4351 /* if free space is not available, make more free space */ 4352 if (current_space->local_remaining<bnzi) { 4353 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4354 nspacedouble++; 4355 } 4356 /* copy data into free space, then initialize lnk */ 4357 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4358 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4359 4360 current_space->array += bnzi; 4361 current_space->local_used += bnzi; 4362 current_space->local_remaining -= bnzi; 4363 4364 bi[i+1] = bi[i] + bnzi; 4365 } 4366 4367 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4368 4369 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4370 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4371 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4372 4373 /* create symbolic parallel matrix B_mpi */ 4374 /*---------------------------------------*/ 4375 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4376 if (n==PETSC_DECIDE) { 4377 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4378 } else { 4379 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4380 } 4381 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4382 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4383 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4384 4385 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4386 B_mpi->assembled = PETSC_FALSE; 4387 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4388 merge->bi = bi; 4389 merge->bj = bj; 4390 merge->buf_ri = buf_ri; 4391 merge->buf_rj = buf_rj; 4392 merge->coi = PETSC_NULL; 4393 merge->coj = PETSC_NULL; 4394 merge->owners_co = PETSC_NULL; 4395 4396 /* attach the supporting struct to B_mpi for reuse */ 4397 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4398 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4399 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4400 *mpimat = B_mpi; 4401 4402 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4403 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4404 PetscFunctionReturn(0); 4405 } 4406 4407 #undef __FUNCT__ 4408 #define __FUNCT__ "MatMerge_SeqsToMPI" 4409 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4410 { 4411 PetscErrorCode ierr; 4412 4413 PetscFunctionBegin; 4414 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4415 if (scall == MAT_INITIAL_MATRIX){ 4416 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4417 } 4418 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4419 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "MatGetLocalMat" 4425 /*@ 4426 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4427 4428 Not Collective 4429 4430 Input Parameters: 4431 + A - the matrix 4432 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4433 4434 Output Parameter: 4435 . A_loc - the local sequential matrix generated 4436 4437 Level: developer 4438 4439 @*/ 4440 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4441 { 4442 PetscErrorCode ierr; 4443 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4444 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4445 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4446 MatScalar *aa=a->a,*ba=b->a,*cam; 4447 PetscScalar *ca; 4448 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4449 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4450 4451 PetscFunctionBegin; 4452 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4453 if (scall == MAT_INITIAL_MATRIX){ 4454 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4455 ci[0] = 0; 4456 for (i=0; i<am; i++){ 4457 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4458 } 4459 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4460 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4461 k = 0; 4462 for (i=0; i<am; i++) { 4463 ncols_o = bi[i+1] - bi[i]; 4464 ncols_d = ai[i+1] - ai[i]; 4465 /* off-diagonal portion of A */ 4466 for (jo=0; jo<ncols_o; jo++) { 4467 col = cmap[*bj]; 4468 if (col >= cstart) break; 4469 cj[k] = col; bj++; 4470 ca[k++] = *ba++; 4471 } 4472 /* diagonal portion of A */ 4473 for (j=0; j<ncols_d; j++) { 4474 cj[k] = cstart + *aj++; 4475 ca[k++] = *aa++; 4476 } 4477 /* off-diagonal portion of A */ 4478 for (j=jo; j<ncols_o; j++) { 4479 cj[k] = cmap[*bj++]; 4480 ca[k++] = *ba++; 4481 } 4482 } 4483 /* put together the new matrix */ 4484 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4485 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4486 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4487 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4488 mat->free_a = PETSC_TRUE; 4489 mat->free_ij = PETSC_TRUE; 4490 mat->nonew = 0; 4491 } else if (scall == MAT_REUSE_MATRIX){ 4492 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4493 ci = mat->i; cj = mat->j; cam = mat->a; 4494 for (i=0; i<am; i++) { 4495 /* off-diagonal portion of A */ 4496 ncols_o = bi[i+1] - bi[i]; 4497 for (jo=0; jo<ncols_o; jo++) { 4498 col = cmap[*bj]; 4499 if (col >= cstart) break; 4500 *cam++ = *ba++; bj++; 4501 } 4502 /* diagonal portion of A */ 4503 ncols_d = ai[i+1] - ai[i]; 4504 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4505 /* off-diagonal portion of A */ 4506 for (j=jo; j<ncols_o; j++) { 4507 *cam++ = *ba++; bj++; 4508 } 4509 } 4510 } else { 4511 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4512 } 4513 4514 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4515 PetscFunctionReturn(0); 4516 } 4517 4518 #undef __FUNCT__ 4519 #define __FUNCT__ "MatGetLocalMatCondensed" 4520 /*@C 4521 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4522 4523 Not Collective 4524 4525 Input Parameters: 4526 + A - the matrix 4527 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4528 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4529 4530 Output Parameter: 4531 . A_loc - the local sequential matrix generated 4532 4533 Level: developer 4534 4535 @*/ 4536 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4537 { 4538 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4539 PetscErrorCode ierr; 4540 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4541 IS isrowa,iscola; 4542 Mat *aloc; 4543 4544 PetscFunctionBegin; 4545 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4546 if (!row){ 4547 start = A->rmap->rstart; end = A->rmap->rend; 4548 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4549 } else { 4550 isrowa = *row; 4551 } 4552 if (!col){ 4553 start = A->cmap->rstart; 4554 cmap = a->garray; 4555 nzA = a->A->cmap->n; 4556 nzB = a->B->cmap->n; 4557 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4558 ncols = 0; 4559 for (i=0; i<nzB; i++) { 4560 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4561 else break; 4562 } 4563 imark = i; 4564 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4565 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4566 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4567 ierr = PetscFree(idx);CHKERRQ(ierr); 4568 } else { 4569 iscola = *col; 4570 } 4571 if (scall != MAT_INITIAL_MATRIX){ 4572 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4573 aloc[0] = *A_loc; 4574 } 4575 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4576 *A_loc = aloc[0]; 4577 ierr = PetscFree(aloc);CHKERRQ(ierr); 4578 if (!row){ 4579 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4580 } 4581 if (!col){ 4582 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4583 } 4584 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "MatGetBrowsOfAcols" 4590 /*@C 4591 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4592 4593 Collective on Mat 4594 4595 Input Parameters: 4596 + A,B - the matrices in mpiaij format 4597 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4598 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4599 4600 Output Parameter: 4601 + rowb, colb - index sets of rows and columns of B to extract 4602 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4603 - B_seq - the sequential matrix generated 4604 4605 Level: developer 4606 4607 @*/ 4608 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4609 { 4610 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4611 PetscErrorCode ierr; 4612 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4613 IS isrowb,iscolb; 4614 Mat *bseq; 4615 4616 PetscFunctionBegin; 4617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4618 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4619 } 4620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4621 4622 if (scall == MAT_INITIAL_MATRIX){ 4623 start = A->cmap->rstart; 4624 cmap = a->garray; 4625 nzA = a->A->cmap->n; 4626 nzB = a->B->cmap->n; 4627 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4628 ncols = 0; 4629 for (i=0; i<nzB; i++) { /* row < local row index */ 4630 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4631 else break; 4632 } 4633 imark = i; 4634 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4635 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4636 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4637 ierr = PetscFree(idx);CHKERRQ(ierr); 4638 *brstart = imark; 4639 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4640 } else { 4641 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4642 isrowb = *rowb; iscolb = *colb; 4643 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4644 bseq[0] = *B_seq; 4645 } 4646 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4647 *B_seq = bseq[0]; 4648 ierr = PetscFree(bseq);CHKERRQ(ierr); 4649 if (!rowb){ 4650 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4651 } else { 4652 *rowb = isrowb; 4653 } 4654 if (!colb){ 4655 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4656 } else { 4657 *colb = iscolb; 4658 } 4659 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 #undef __FUNCT__ 4664 #define __FUNCT__ "MatGetBrowsOfAoCols" 4665 /*@C 4666 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4667 of the OFF-DIAGONAL portion of local A 4668 4669 Collective on Mat 4670 4671 Input Parameters: 4672 + A,B - the matrices in mpiaij format 4673 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4674 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4675 . startsj_r - similar to startsj for receives 4676 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4677 4678 Output Parameter: 4679 + B_oth - the sequential matrix generated 4680 4681 Level: developer 4682 4683 @*/ 4684 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4685 { 4686 VecScatter_MPI_General *gen_to,*gen_from; 4687 PetscErrorCode ierr; 4688 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4689 Mat_SeqAIJ *b_oth; 4690 VecScatter ctx=a->Mvctx; 4691 MPI_Comm comm=((PetscObject)ctx)->comm; 4692 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4693 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4694 PetscScalar *rvalues,*svalues; 4695 MatScalar *b_otha,*bufa,*bufA; 4696 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4697 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4698 MPI_Status *sstatus,rstatus; 4699 PetscMPIInt jj; 4700 PetscInt *cols,sbs,rbs; 4701 PetscScalar *vals; 4702 4703 PetscFunctionBegin; 4704 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4705 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4706 } 4707 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4709 4710 gen_to = (VecScatter_MPI_General*)ctx->todata; 4711 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4712 rvalues = gen_from->values; /* holds the length of receiving row */ 4713 svalues = gen_to->values; /* holds the length of sending row */ 4714 nrecvs = gen_from->n; 4715 nsends = gen_to->n; 4716 4717 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4718 srow = gen_to->indices; /* local row index to be sent */ 4719 sstarts = gen_to->starts; 4720 sprocs = gen_to->procs; 4721 sstatus = gen_to->sstatus; 4722 sbs = gen_to->bs; 4723 rstarts = gen_from->starts; 4724 rprocs = gen_from->procs; 4725 rbs = gen_from->bs; 4726 4727 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4728 if (scall == MAT_INITIAL_MATRIX){ 4729 /* i-array */ 4730 /*---------*/ 4731 /* post receives */ 4732 for (i=0; i<nrecvs; i++){ 4733 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4734 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4735 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4736 } 4737 4738 /* pack the outgoing message */ 4739 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4740 sstartsj[0] = 0; rstartsj[0] = 0; 4741 len = 0; /* total length of j or a array to be sent */ 4742 k = 0; 4743 for (i=0; i<nsends; i++){ 4744 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4745 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4746 for (j=0; j<nrows; j++) { 4747 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4748 for (l=0; l<sbs; l++){ 4749 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4750 rowlen[j*sbs+l] = ncols; 4751 len += ncols; 4752 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4753 } 4754 k++; 4755 } 4756 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4757 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4758 } 4759 /* recvs and sends of i-array are completed */ 4760 i = nrecvs; 4761 while (i--) { 4762 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4763 } 4764 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4765 4766 /* allocate buffers for sending j and a arrays */ 4767 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4768 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4769 4770 /* create i-array of B_oth */ 4771 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4772 b_othi[0] = 0; 4773 len = 0; /* total length of j or a array to be received */ 4774 k = 0; 4775 for (i=0; i<nrecvs; i++){ 4776 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4777 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4778 for (j=0; j<nrows; j++) { 4779 b_othi[k+1] = b_othi[k] + rowlen[j]; 4780 len += rowlen[j]; k++; 4781 } 4782 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4783 } 4784 4785 /* allocate space for j and a arrrays of B_oth */ 4786 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4787 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4788 4789 /* j-array */ 4790 /*---------*/ 4791 /* post receives of j-array */ 4792 for (i=0; i<nrecvs; i++){ 4793 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4794 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4795 } 4796 4797 /* pack the outgoing message j-array */ 4798 k = 0; 4799 for (i=0; i<nsends; i++){ 4800 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4801 bufJ = bufj+sstartsj[i]; 4802 for (j=0; j<nrows; j++) { 4803 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4804 for (ll=0; ll<sbs; ll++){ 4805 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4806 for (l=0; l<ncols; l++){ 4807 *bufJ++ = cols[l]; 4808 } 4809 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4810 } 4811 } 4812 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4813 } 4814 4815 /* recvs and sends of j-array are completed */ 4816 i = nrecvs; 4817 while (i--) { 4818 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4819 } 4820 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4821 } else if (scall == MAT_REUSE_MATRIX){ 4822 sstartsj = *startsj; 4823 rstartsj = *startsj_r; 4824 bufa = *bufa_ptr; 4825 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4826 b_otha = b_oth->a; 4827 } else { 4828 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4829 } 4830 4831 /* a-array */ 4832 /*---------*/ 4833 /* post receives of a-array */ 4834 for (i=0; i<nrecvs; i++){ 4835 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4836 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4837 } 4838 4839 /* pack the outgoing message a-array */ 4840 k = 0; 4841 for (i=0; i<nsends; i++){ 4842 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4843 bufA = bufa+sstartsj[i]; 4844 for (j=0; j<nrows; j++) { 4845 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4846 for (ll=0; ll<sbs; ll++){ 4847 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4848 for (l=0; l<ncols; l++){ 4849 *bufA++ = vals[l]; 4850 } 4851 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4852 } 4853 } 4854 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4855 } 4856 /* recvs and sends of a-array are completed */ 4857 i = nrecvs; 4858 while (i--) { 4859 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4860 } 4861 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4862 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4863 4864 if (scall == MAT_INITIAL_MATRIX){ 4865 /* put together the new matrix */ 4866 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4867 4868 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4869 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4870 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4871 b_oth->free_a = PETSC_TRUE; 4872 b_oth->free_ij = PETSC_TRUE; 4873 b_oth->nonew = 0; 4874 4875 ierr = PetscFree(bufj);CHKERRQ(ierr); 4876 if (!startsj || !bufa_ptr){ 4877 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4878 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4879 } else { 4880 *startsj = sstartsj; 4881 *startsj_r = rstartsj; 4882 *bufa_ptr = bufa; 4883 } 4884 } 4885 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4886 PetscFunctionReturn(0); 4887 } 4888 4889 #undef __FUNCT__ 4890 #define __FUNCT__ "MatGetCommunicationStructs" 4891 /*@C 4892 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4893 4894 Not Collective 4895 4896 Input Parameters: 4897 . A - The matrix in mpiaij format 4898 4899 Output Parameter: 4900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4901 . colmap - A map from global column index to local index into lvec 4902 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4903 4904 Level: developer 4905 4906 @*/ 4907 #if defined (PETSC_USE_CTABLE) 4908 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4909 #else 4910 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4911 #endif 4912 { 4913 Mat_MPIAIJ *a; 4914 4915 PetscFunctionBegin; 4916 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4917 PetscValidPointer(lvec, 2) 4918 PetscValidPointer(colmap, 3) 4919 PetscValidPointer(multScatter, 4) 4920 a = (Mat_MPIAIJ *) A->data; 4921 if (lvec) *lvec = a->lvec; 4922 if (colmap) *colmap = a->colmap; 4923 if (multScatter) *multScatter = a->Mvctx; 4924 PetscFunctionReturn(0); 4925 } 4926 4927 EXTERN_C_BEGIN 4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4931 EXTERN_C_END 4932 4933 #undef __FUNCT__ 4934 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4935 /* 4936 Computes (B'*A')' since computing B*A directly is untenable 4937 4938 n p p 4939 ( ) ( ) ( ) 4940 m ( A ) * n ( B ) = m ( C ) 4941 ( ) ( ) ( ) 4942 4943 */ 4944 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4945 { 4946 PetscErrorCode ierr; 4947 Mat At,Bt,Ct; 4948 4949 PetscFunctionBegin; 4950 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4951 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4952 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4953 ierr = MatDestroy(At);CHKERRQ(ierr); 4954 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4955 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4956 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 4960 #undef __FUNCT__ 4961 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4963 { 4964 PetscErrorCode ierr; 4965 PetscInt m=A->rmap->n,n=B->cmap->n; 4966 Mat Cmat; 4967 4968 PetscFunctionBegin; 4969 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4970 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4971 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4972 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4973 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4974 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4975 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4976 *C = Cmat; 4977 PetscFunctionReturn(0); 4978 } 4979 4980 /* ----------------------------------------------------------------*/ 4981 #undef __FUNCT__ 4982 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4983 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4984 { 4985 PetscErrorCode ierr; 4986 4987 PetscFunctionBegin; 4988 if (scall == MAT_INITIAL_MATRIX){ 4989 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4990 } 4991 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4992 PetscFunctionReturn(0); 4993 } 4994 4995 EXTERN_C_BEGIN 4996 #if defined(PETSC_HAVE_MUMPS) 4997 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4998 #endif 4999 #if defined(PETSC_HAVE_PASTIX) 5000 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5001 #endif 5002 #if defined(PETSC_HAVE_SUPERLU_DIST) 5003 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5004 #endif 5005 #if defined(PETSC_HAVE_SPOOLES) 5006 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5007 #endif 5008 EXTERN_C_END 5009 5010 /*MC 5011 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5012 5013 Options Database Keys: 5014 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5015 5016 Level: beginner 5017 5018 .seealso: MatCreateMPIAIJ() 5019 M*/ 5020 5021 EXTERN_C_BEGIN 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatCreate_MPIAIJ" 5024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5025 { 5026 Mat_MPIAIJ *b; 5027 PetscErrorCode ierr; 5028 PetscMPIInt size; 5029 5030 PetscFunctionBegin; 5031 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5032 5033 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5034 B->data = (void*)b; 5035 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5036 B->rmap->bs = 1; 5037 B->assembled = PETSC_FALSE; 5038 B->mapping = 0; 5039 5040 B->insertmode = NOT_SET_VALUES; 5041 b->size = size; 5042 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5043 5044 /* build cache for off array entries formed */ 5045 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5046 b->donotstash = PETSC_FALSE; 5047 b->colmap = 0; 5048 b->garray = 0; 5049 b->roworiented = PETSC_TRUE; 5050 5051 /* stuff used for matrix vector multiply */ 5052 b->lvec = PETSC_NULL; 5053 b->Mvctx = PETSC_NULL; 5054 5055 /* stuff for MatGetRow() */ 5056 b->rowindices = 0; 5057 b->rowvalues = 0; 5058 b->getrowactive = PETSC_FALSE; 5059 5060 #if defined(PETSC_HAVE_SPOOLES) 5061 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5062 "MatGetFactor_mpiaij_spooles", 5063 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5064 #endif 5065 #if defined(PETSC_HAVE_MUMPS) 5066 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5067 "MatGetFactor_mpiaij_mumps", 5068 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5069 #endif 5070 #if defined(PETSC_HAVE_PASTIX) 5071 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5072 "MatGetFactor_mpiaij_pastix", 5073 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5074 #endif 5075 #if defined(PETSC_HAVE_SUPERLU_DIST) 5076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5077 "MatGetFactor_mpiaij_superlu_dist", 5078 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5079 #endif 5080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5081 "MatStoreValues_MPIAIJ", 5082 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5084 "MatRetrieveValues_MPIAIJ", 5085 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5087 "MatGetDiagonalBlock_MPIAIJ", 5088 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5090 "MatIsTranspose_MPIAIJ", 5091 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5093 "MatMPIAIJSetPreallocation_MPIAIJ", 5094 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5096 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5097 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5099 "MatDiagonalScaleLocal_MPIAIJ", 5100 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5102 "MatConvert_MPIAIJ_MPICSRPERM", 5103 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5105 "MatConvert_MPIAIJ_MPICRL", 5106 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5108 "MatConvert_MPIAIJ_MPISBAIJ", 5109 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5111 "MatMatMult_MPIDense_MPIAIJ", 5112 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5114 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5115 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5117 "MatMatMultNumeric_MPIDense_MPIAIJ", 5118 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5119 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5120 PetscFunctionReturn(0); 5121 } 5122 EXTERN_C_END 5123 5124 #undef __FUNCT__ 5125 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5126 /*@ 5127 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5128 and "off-diagonal" part of the matrix in CSR format. 5129 5130 Collective on MPI_Comm 5131 5132 Input Parameters: 5133 + comm - MPI communicator 5134 . m - number of local rows (Cannot be PETSC_DECIDE) 5135 . n - This value should be the same as the local size used in creating the 5136 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5137 calculated if N is given) For square matrices n is almost always m. 5138 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5139 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5140 . i - row indices for "diagonal" portion of matrix 5141 . j - column indices 5142 . a - matrix values 5143 . oi - row indices for "off-diagonal" portion of matrix 5144 . oj - column indices 5145 - oa - matrix values 5146 5147 Output Parameter: 5148 . mat - the matrix 5149 5150 Level: advanced 5151 5152 Notes: 5153 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5154 5155 The i and j indices are 0 based 5156 5157 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5158 5159 This sets local rows and cannot be used to set off-processor values. 5160 5161 You cannot later use MatSetValues() to change values in this matrix. 5162 5163 .keywords: matrix, aij, compressed row, sparse, parallel 5164 5165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5166 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5167 @*/ 5168 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5169 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5170 { 5171 PetscErrorCode ierr; 5172 Mat_MPIAIJ *maij; 5173 5174 PetscFunctionBegin; 5175 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5176 if (i[0]) { 5177 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5178 } 5179 if (oi[0]) { 5180 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5181 } 5182 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5183 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5184 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5185 maij = (Mat_MPIAIJ*) (*mat)->data; 5186 maij->donotstash = PETSC_TRUE; 5187 (*mat)->preallocated = PETSC_TRUE; 5188 5189 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5190 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5191 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5192 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5193 5194 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5195 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5196 5197 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5201 5202 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 PetscFunctionReturn(0); 5205 } 5206 5207 /* 5208 Special version for direct calls from Fortran 5209 */ 5210 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5211 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5212 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5213 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5214 #endif 5215 5216 /* Change these macros so can be used in void function */ 5217 #undef CHKERRQ 5218 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5219 #undef SETERRQ2 5220 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5221 #undef SETERRQ 5222 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5223 5224 EXTERN_C_BEGIN 5225 #undef __FUNCT__ 5226 #define __FUNCT__ "matsetvaluesmpiaij_" 5227 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5228 { 5229 Mat mat = *mmat; 5230 PetscInt m = *mm, n = *mn; 5231 InsertMode addv = *maddv; 5232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5233 PetscScalar value; 5234 PetscErrorCode ierr; 5235 5236 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5237 if (mat->insertmode == NOT_SET_VALUES) { 5238 mat->insertmode = addv; 5239 } 5240 #if defined(PETSC_USE_DEBUG) 5241 else if (mat->insertmode != addv) { 5242 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5243 } 5244 #endif 5245 { 5246 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5247 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5248 PetscTruth roworiented = aij->roworiented; 5249 5250 /* Some Variables required in the macro */ 5251 Mat A = aij->A; 5252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5253 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5254 MatScalar *aa = a->a; 5255 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5256 Mat B = aij->B; 5257 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5258 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5259 MatScalar *ba = b->a; 5260 5261 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5262 PetscInt nonew = a->nonew; 5263 MatScalar *ap1,*ap2; 5264 5265 PetscFunctionBegin; 5266 for (i=0; i<m; i++) { 5267 if (im[i] < 0) continue; 5268 #if defined(PETSC_USE_DEBUG) 5269 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5270 #endif 5271 if (im[i] >= rstart && im[i] < rend) { 5272 row = im[i] - rstart; 5273 lastcol1 = -1; 5274 rp1 = aj + ai[row]; 5275 ap1 = aa + ai[row]; 5276 rmax1 = aimax[row]; 5277 nrow1 = ailen[row]; 5278 low1 = 0; 5279 high1 = nrow1; 5280 lastcol2 = -1; 5281 rp2 = bj + bi[row]; 5282 ap2 = ba + bi[row]; 5283 rmax2 = bimax[row]; 5284 nrow2 = bilen[row]; 5285 low2 = 0; 5286 high2 = nrow2; 5287 5288 for (j=0; j<n; j++) { 5289 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5290 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5291 if (in[j] >= cstart && in[j] < cend){ 5292 col = in[j] - cstart; 5293 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5294 } else if (in[j] < 0) continue; 5295 #if defined(PETSC_USE_DEBUG) 5296 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5297 #endif 5298 else { 5299 if (mat->was_assembled) { 5300 if (!aij->colmap) { 5301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5302 } 5303 #if defined (PETSC_USE_CTABLE) 5304 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5305 col--; 5306 #else 5307 col = aij->colmap[in[j]] - 1; 5308 #endif 5309 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5310 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5311 col = in[j]; 5312 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5313 B = aij->B; 5314 b = (Mat_SeqAIJ*)B->data; 5315 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5316 rp2 = bj + bi[row]; 5317 ap2 = ba + bi[row]; 5318 rmax2 = bimax[row]; 5319 nrow2 = bilen[row]; 5320 low2 = 0; 5321 high2 = nrow2; 5322 bm = aij->B->rmap->n; 5323 ba = b->a; 5324 } 5325 } else col = in[j]; 5326 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5327 } 5328 } 5329 } else { 5330 if (!aij->donotstash) { 5331 if (roworiented) { 5332 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5333 } else { 5334 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5335 } 5336 } 5337 } 5338 }} 5339 PetscFunctionReturnVoid(); 5340 } 5341 EXTERN_C_END 5342 5343