1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 if (v) PetscValidScalarPointer(v,6); 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 551 { 552 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 557 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatZeroRows_MPIAIJ" 563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 564 { 565 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 568 PetscInt i,*owners = A->rmap->range; 569 PetscInt *nprocs,j,idx,nsends,row; 570 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 571 PetscInt *rvalues,count,base,slen,*source; 572 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 573 MPI_Comm comm = ((PetscObject)A)->comm; 574 MPI_Request *send_waits,*recv_waits; 575 MPI_Status recv_status,*send_status; 576 #if defined(PETSC_DEBUG) 577 PetscTruth found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 /* inform other processors of number of messages and max length*/ 608 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 609 610 /* post receives: */ 611 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 612 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 613 for (i=0; i<nrecvs; i++) { 614 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 615 } 616 617 /* do sends: 618 1) starts[i] gives the starting index in svalues for stuff going to 619 the ith processor 620 */ 621 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 622 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 623 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 624 starts[0] = 0; 625 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 626 for (i=0; i<N; i++) { 627 svalues[starts[owner[i]]++] = rows[i]; 628 } 629 630 starts[0] = 0; 631 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 count = 0; 633 for (i=0; i<size; i++) { 634 if (nprocs[2*i+1]) { 635 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscFree(starts);CHKERRQ(ierr); 639 640 base = owners[rank]; 641 642 /* wait on receives */ 643 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_COOKIE; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_STRUCTURALLY_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_HERMITIAN: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_SYMMETRY_ETERNAL: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 default: 1461 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetRow_MPIAIJ" 1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1469 { 1470 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1471 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1472 PetscErrorCode ierr; 1473 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1474 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1475 PetscInt *cmap,*idx_p; 1476 1477 PetscFunctionBegin; 1478 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1479 mat->getrowactive = PETSC_TRUE; 1480 1481 if (!mat->rowvalues && (idx || v)) { 1482 /* 1483 allocate enough space to hold information from the longest row. 1484 */ 1485 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1486 PetscInt max = 1,tmp; 1487 for (i=0; i<matin->rmap->n; i++) { 1488 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1489 if (max < tmp) { max = tmp; } 1490 } 1491 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1492 } 1493 1494 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1495 lrow = row - rstart; 1496 1497 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1498 if (!v) {pvA = 0; pvB = 0;} 1499 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1500 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 nztot = nzA + nzB; 1503 1504 cmap = mat->garray; 1505 if (v || idx) { 1506 if (nztot) { 1507 /* Sort by increasing column numbers, assuming A and B already sorted */ 1508 PetscInt imark = -1; 1509 if (v) { 1510 *v = v_p = mat->rowvalues; 1511 for (i=0; i<nzB; i++) { 1512 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1513 else break; 1514 } 1515 imark = i; 1516 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1517 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1518 } 1519 if (idx) { 1520 *idx = idx_p = mat->rowindices; 1521 if (imark > -1) { 1522 for (i=0; i<imark; i++) { 1523 idx_p[i] = cmap[cworkB[i]]; 1524 } 1525 } else { 1526 for (i=0; i<nzB; i++) { 1527 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1528 else break; 1529 } 1530 imark = i; 1531 } 1532 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1533 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 if (idx) *idx = 0; 1537 if (v) *v = 0; 1538 } 1539 } 1540 *nz = nztot; 1541 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1551 1552 PetscFunctionBegin; 1553 if (!aij->getrowactive) { 1554 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1555 } 1556 aij->getrowactive = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatNorm_MPIAIJ" 1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1563 { 1564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1565 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,cstart = mat->cmap->rstart; 1568 PetscReal sum = 0.0; 1569 MatScalar *v; 1570 1571 PetscFunctionBegin; 1572 if (aij->size == 1) { 1573 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1574 } else { 1575 if (type == NORM_FROBENIUS) { 1576 v = amat->a; 1577 for (i=0; i<amat->nz; i++) { 1578 #if defined(PETSC_USE_COMPLEX) 1579 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1580 #else 1581 sum += (*v)*(*v); v++; 1582 #endif 1583 } 1584 v = bmat->a; 1585 for (i=0; i<bmat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1593 *norm = sqrt(*norm); 1594 } else if (type == NORM_1) { /* max column norm */ 1595 PetscReal *tmp,*tmp2; 1596 PetscInt *jj,*garray = aij->garray; 1597 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1599 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1600 *norm = 0.0; 1601 v = amat->a; jj = amat->j; 1602 for (j=0; j<amat->nz; j++) { 1603 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1604 } 1605 v = bmat->a; jj = bmat->j; 1606 for (j=0; j<bmat->nz; j++) { 1607 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1608 } 1609 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1610 for (j=0; j<mat->cmap->N; j++) { 1611 if (tmp2[j] > *norm) *norm = tmp2[j]; 1612 } 1613 ierr = PetscFree(tmp);CHKERRQ(ierr); 1614 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1615 } else if (type == NORM_INFINITY) { /* max row norm */ 1616 PetscReal ntemp = 0.0; 1617 for (j=0; j<aij->A->rmap->n; j++) { 1618 v = amat->a + amat->i[j]; 1619 sum = 0.0; 1620 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1621 sum += PetscAbsScalar(*v); v++; 1622 } 1623 v = bmat->a + bmat->i[j]; 1624 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1625 sum += PetscAbsScalar(*v); v++; 1626 } 1627 if (sum > ntemp) ntemp = sum; 1628 } 1629 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1630 } else { 1631 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1632 } 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatTranspose_MPIAIJ" 1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1640 { 1641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1642 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1643 PetscErrorCode ierr; 1644 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1645 PetscInt cstart=A->cmap->rstart,ncol; 1646 Mat B; 1647 MatScalar *array; 1648 1649 PetscFunctionBegin; 1650 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1651 1652 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1653 ai = Aloc->i; aj = Aloc->j; 1654 bi = Bloc->i; bj = Bloc->j; 1655 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1656 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1657 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1658 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1659 for (i=0; i<ai[ma]; i++){ 1660 d_nnz[aj[i]] ++; 1661 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1662 } 1663 1664 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1665 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1666 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1667 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1668 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1669 } else { 1670 B = *matout; 1671 } 1672 1673 /* copy over the A part */ 1674 array = Aloc->a; 1675 row = A->rmap->rstart; 1676 for (i=0; i<ma; i++) { 1677 ncol = ai[i+1]-ai[i]; 1678 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1679 row++; array += ncol; aj += ncol; 1680 } 1681 aj = Aloc->j; 1682 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1683 1684 /* copy over the B part */ 1685 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1686 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1687 array = Bloc->a; 1688 row = A->rmap->rstart; 1689 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1690 cols_tmp = cols; 1691 for (i=0; i<mb; i++) { 1692 ncol = bi[i+1]-bi[i]; 1693 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1694 row++; array += ncol; cols_tmp += ncol; 1695 } 1696 ierr = PetscFree(cols);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1701 *matout = B; 1702 } else { 1703 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1713 Mat a = aij->A,b = aij->B; 1714 PetscErrorCode ierr; 1715 PetscInt s1,s2,s3; 1716 1717 PetscFunctionBegin; 1718 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1719 if (rr) { 1720 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1721 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1722 /* Overlap communication with computation. */ 1723 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 } 1725 if (ll) { 1726 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1727 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1728 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1729 } 1730 /* scale the diagonal block */ 1731 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1732 1733 if (rr) { 1734 /* Do a scatter end and then right scale the off-diagonal block */ 1735 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1737 } 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1751 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1752 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1759 { 1760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1761 PetscErrorCode ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatEqual_MPIAIJ" 1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1771 { 1772 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1773 Mat a,b,c,d; 1774 PetscTruth flg; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 a = matA->A; b = matA->B; 1779 c = matB->A; d = matB->B; 1780 1781 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1782 if (flg) { 1783 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1784 } 1785 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatCopy_MPIAIJ" 1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1792 { 1793 PetscErrorCode ierr; 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1795 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 /* because of the column compression in the off-processor part of the matrix a->B, 1801 the number of columns in a->B and b->B may be different, hence we cannot call 1802 the MatCopy() directly on the two parts. If need be, we can provide a more 1803 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1804 then copying the submatrices */ 1805 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1808 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1816 { 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #include "petscblaslapack.h" 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 0 2719 }; 2720 2721 /* ----------------------------------------------------------------------------------------*/ 2722 2723 EXTERN_C_BEGIN 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 EXTERN_C_BEGIN 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 EXTERN_C_END 2752 2753 #include "petscpc.h" 2754 EXTERN_C_BEGIN 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2758 { 2759 Mat_MPIAIJ *b; 2760 PetscErrorCode ierr; 2761 PetscInt i; 2762 2763 PetscFunctionBegin; 2764 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2765 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2766 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2767 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2768 2769 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 if (d_nnz) { 2774 for (i=0; i<B->rmap->n; i++) { 2775 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2776 } 2777 } 2778 if (o_nnz) { 2779 for (i=0; i<B->rmap->n; i++) { 2780 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2781 } 2782 } 2783 b = (Mat_MPIAIJ*)B->data; 2784 2785 if (!B->preallocated) { 2786 /* Explicitly create 2 MATSEQAIJ matrices. */ 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2791 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2792 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2793 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2795 } 2796 2797 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2798 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2799 B->preallocated = PETSC_TRUE; 2800 PetscFunctionReturn(0); 2801 } 2802 EXTERN_C_END 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2807 { 2808 Mat mat; 2809 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 *newmat = 0; 2814 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2816 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2817 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2818 a = (Mat_MPIAIJ*)mat->data; 2819 2820 mat->factor = matin->factor; 2821 mat->rmap->bs = matin->rmap->bs; 2822 mat->assembled = PETSC_TRUE; 2823 mat->insertmode = NOT_SET_VALUES; 2824 mat->preallocated = PETSC_TRUE; 2825 2826 a->size = oldmat->size; 2827 a->rank = oldmat->rank; 2828 a->donotstash = oldmat->donotstash; 2829 a->roworiented = oldmat->roworiented; 2830 a->rowindices = 0; 2831 a->rowvalues = 0; 2832 a->getrowactive = PETSC_FALSE; 2833 2834 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2835 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2836 2837 if (oldmat->colmap) { 2838 #if defined (PETSC_USE_CTABLE) 2839 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2840 #else 2841 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2842 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2843 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2844 #endif 2845 } else a->colmap = 0; 2846 if (oldmat->garray) { 2847 PetscInt len; 2848 len = oldmat->B->cmap->n; 2849 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2850 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2851 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2852 } else a->garray = 0; 2853 2854 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2856 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2858 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2860 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2862 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2863 *newmat = mat; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #include "petscsys.h" 2868 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatLoad_MPIAIJ" 2871 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2872 { 2873 Mat A; 2874 PetscScalar *vals,*svals; 2875 MPI_Comm comm = ((PetscObject)viewer)->comm; 2876 MPI_Status status; 2877 PetscErrorCode ierr; 2878 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2879 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2880 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2881 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2882 PetscInt cend,cstart,n,*rowners; 2883 int fd; 2884 2885 PetscFunctionBegin; 2886 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2888 if (!rank) { 2889 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2890 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2891 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2892 } 2893 2894 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2895 M = header[1]; N = header[2]; 2896 /* determine ownership of all rows */ 2897 m = M/size + ((M % size) > rank); 2898 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2899 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2900 2901 /* First process needs enough room for process with most rows */ 2902 if (!rank) { 2903 mmax = rowners[1]; 2904 for (i=2; i<size; i++) { 2905 mmax = PetscMax(mmax,rowners[i]); 2906 } 2907 } else mmax = m; 2908 2909 rowners[0] = 0; 2910 for (i=2; i<=size; i++) { 2911 rowners[i] += rowners[i-1]; 2912 } 2913 rstart = rowners[rank]; 2914 rend = rowners[rank+1]; 2915 2916 /* distribute row lengths to all processors */ 2917 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2918 if (!rank) { 2919 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2920 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2921 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2922 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2923 for (j=0; j<m; j++) { 2924 procsnz[0] += ourlens[j]; 2925 } 2926 for (i=1; i<size; i++) { 2927 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2928 /* calculate the number of nonzeros on each processor */ 2929 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2930 procsnz[i] += rowlengths[j]; 2931 } 2932 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2933 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2934 } 2935 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2936 } else { 2937 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2938 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2939 } 2940 2941 if (!rank) { 2942 /* determine max buffer needed and allocate it */ 2943 maxnz = 0; 2944 for (i=0; i<size; i++) { 2945 maxnz = PetscMax(maxnz,procsnz[i]); 2946 } 2947 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2948 2949 /* read in my part of the matrix column indices */ 2950 nz = procsnz[0]; 2951 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2952 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2953 2954 /* read in every one elses and ship off */ 2955 for (i=1; i<size; i++) { 2956 nz = procsnz[i]; 2957 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2958 mpicnt = PetscMPIIntCast(nz); 2959 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2960 } 2961 ierr = PetscFree(cols);CHKERRQ(ierr); 2962 } else { 2963 /* determine buffer space needed for message */ 2964 nz = 0; 2965 for (i=0; i<m; i++) { 2966 nz += ourlens[i]; 2967 } 2968 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2969 2970 /* receive message of column indices*/ 2971 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2972 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2973 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2974 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2975 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2976 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2977 } 2978 2979 /* determine column ownership if matrix is not square */ 2980 if (N != M) { 2981 n = N/size + ((N % size) > rank); 2982 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2983 cstart = cend - n; 2984 } else { 2985 cstart = rstart; 2986 cend = rend; 2987 n = cend - cstart; 2988 } 2989 2990 /* loop over local rows, determining number of off diagonal entries */ 2991 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2992 jj = 0; 2993 for (i=0; i<m; i++) { 2994 for (j=0; j<ourlens[i]; j++) { 2995 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2996 jj++; 2997 } 2998 } 2999 3000 /* create our matrix */ 3001 for (i=0; i<m; i++) { 3002 ourlens[i] -= offlens[i]; 3003 } 3004 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3005 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3006 ierr = MatSetType(A,type);CHKERRQ(ierr); 3007 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3008 3009 for (i=0; i<m; i++) { 3010 ourlens[i] += offlens[i]; 3011 } 3012 3013 if (!rank) { 3014 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3015 3016 /* read in my part of the matrix numerical values */ 3017 nz = procsnz[0]; 3018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3019 3020 /* insert into matrix */ 3021 jj = rstart; 3022 smycols = mycols; 3023 svals = vals; 3024 for (i=0; i<m; i++) { 3025 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3026 smycols += ourlens[i]; 3027 svals += ourlens[i]; 3028 jj++; 3029 } 3030 3031 /* read in other processors and ship out */ 3032 for (i=1; i<size; i++) { 3033 nz = procsnz[i]; 3034 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3035 mpicnt = PetscMPIIntCast(nz); 3036 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3037 } 3038 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3039 } else { 3040 /* receive numeric values */ 3041 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3042 3043 /* receive message of values*/ 3044 mpicnt = PetscMPIIntCast(nz); 3045 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3046 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3047 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3048 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3049 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3050 3051 /* insert into matrix */ 3052 jj = rstart; 3053 smycols = mycols; 3054 svals = vals; 3055 for (i=0; i<m; i++) { 3056 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3057 smycols += ourlens[i]; 3058 svals += ourlens[i]; 3059 jj++; 3060 } 3061 } 3062 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3063 ierr = PetscFree(vals);CHKERRQ(ierr); 3064 ierr = PetscFree(mycols);CHKERRQ(ierr); 3065 ierr = PetscFree(rowners);CHKERRQ(ierr); 3066 3067 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 *newmat = A; 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3076 { 3077 PetscErrorCode ierr; 3078 IS iscol_local; 3079 PetscInt csize; 3080 3081 PetscFunctionBegin; 3082 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3083 if (call == MAT_REUSE_MATRIX) { 3084 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3085 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3086 } else { 3087 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3088 } 3089 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3090 if (call == MAT_INITIAL_MATRIX) { 3091 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3092 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3093 } 3094 PetscFunctionReturn(0); 3095 } 3096 3097 #undef __FUNCT__ 3098 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3099 /* 3100 Not great since it makes two copies of the submatrix, first an SeqAIJ 3101 in local and then by concatenating the local matrices the end result. 3102 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3103 3104 Note: This requires a sequential iscol with all indices. 3105 */ 3106 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3107 { 3108 PetscErrorCode ierr; 3109 PetscMPIInt rank,size; 3110 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3111 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3112 Mat *local,M,Mreuse; 3113 MatScalar *vwork,*aa; 3114 MPI_Comm comm = ((PetscObject)mat)->comm; 3115 Mat_SeqAIJ *aij; 3116 3117 3118 PetscFunctionBegin; 3119 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3120 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3121 3122 if (call == MAT_REUSE_MATRIX) { 3123 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3124 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3125 local = &Mreuse; 3126 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3127 } else { 3128 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3129 Mreuse = *local; 3130 ierr = PetscFree(local);CHKERRQ(ierr); 3131 } 3132 3133 /* 3134 m - number of local rows 3135 n - number of columns (same on all processors) 3136 rstart - first row in new global matrix generated 3137 */ 3138 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3139 if (call == MAT_INITIAL_MATRIX) { 3140 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3141 ii = aij->i; 3142 jj = aij->j; 3143 3144 /* 3145 Determine the number of non-zeros in the diagonal and off-diagonal 3146 portions of the matrix in order to do correct preallocation 3147 */ 3148 3149 /* first get start and end of "diagonal" columns */ 3150 if (csize == PETSC_DECIDE) { 3151 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3152 if (mglobal == n) { /* square matrix */ 3153 nlocal = m; 3154 } else { 3155 nlocal = n/size + ((n % size) > rank); 3156 } 3157 } else { 3158 nlocal = csize; 3159 } 3160 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3161 rstart = rend - nlocal; 3162 if (rank == size - 1 && rend != n) { 3163 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3164 } 3165 3166 /* next, compute all the lengths */ 3167 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3168 olens = dlens + m; 3169 for (i=0; i<m; i++) { 3170 jend = ii[i+1] - ii[i]; 3171 olen = 0; 3172 dlen = 0; 3173 for (j=0; j<jend; j++) { 3174 if (*jj < rstart || *jj >= rend) olen++; 3175 else dlen++; 3176 jj++; 3177 } 3178 olens[i] = olen; 3179 dlens[i] = dlen; 3180 } 3181 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3182 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3183 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3184 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3185 ierr = PetscFree(dlens);CHKERRQ(ierr); 3186 } else { 3187 PetscInt ml,nl; 3188 3189 M = *newmat; 3190 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3191 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3192 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3193 /* 3194 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3195 rather than the slower MatSetValues(). 3196 */ 3197 M->was_assembled = PETSC_TRUE; 3198 M->assembled = PETSC_FALSE; 3199 } 3200 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3201 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3202 ii = aij->i; 3203 jj = aij->j; 3204 aa = aij->a; 3205 for (i=0; i<m; i++) { 3206 row = rstart + i; 3207 nz = ii[i+1] - ii[i]; 3208 cwork = jj; jj += nz; 3209 vwork = aa; aa += nz; 3210 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3211 } 3212 3213 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3214 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3215 *newmat = M; 3216 3217 /* save submatrix used in processor for next request */ 3218 if (call == MAT_INITIAL_MATRIX) { 3219 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3220 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3221 } 3222 3223 PetscFunctionReturn(0); 3224 } 3225 3226 EXTERN_C_BEGIN 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3229 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3230 { 3231 PetscInt m,cstart, cend,j,nnz,i,d; 3232 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3233 const PetscInt *JJ; 3234 PetscScalar *values; 3235 PetscErrorCode ierr; 3236 3237 PetscFunctionBegin; 3238 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3239 3240 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3243 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3244 m = B->rmap->n; 3245 cstart = B->cmap->rstart; 3246 cend = B->cmap->rend; 3247 rstart = B->rmap->rstart; 3248 3249 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3250 3251 #if defined(PETSC_USE_DEBUGGING) 3252 for (i=0; i<m; i++) { 3253 nnz = Ii[i+1]- Ii[i]; 3254 JJ = J + Ii[i]; 3255 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3256 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3257 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3258 } 3259 #endif 3260 3261 for (i=0; i<m; i++) { 3262 nnz = Ii[i+1]- Ii[i]; 3263 JJ = J + Ii[i]; 3264 nnz_max = PetscMax(nnz_max,nnz); 3265 d = 0; 3266 for (j=0; j<nnz; j++) { 3267 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3268 } 3269 d_nnz[i] = d; 3270 o_nnz[i] = nnz - d; 3271 } 3272 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3273 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3274 3275 if (v) values = (PetscScalar*)v; 3276 else { 3277 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3278 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3279 } 3280 3281 for (i=0; i<m; i++) { 3282 ii = i + rstart; 3283 nnz = Ii[i+1]- Ii[i]; 3284 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3285 } 3286 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288 3289 if (!v) { 3290 ierr = PetscFree(values);CHKERRQ(ierr); 3291 } 3292 PetscFunctionReturn(0); 3293 } 3294 EXTERN_C_END 3295 3296 #undef __FUNCT__ 3297 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3298 /*@ 3299 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3300 (the default parallel PETSc format). 3301 3302 Collective on MPI_Comm 3303 3304 Input Parameters: 3305 + B - the matrix 3306 . i - the indices into j for the start of each local row (starts with zero) 3307 . j - the column indices for each local row (starts with zero) 3308 - v - optional values in the matrix 3309 3310 Level: developer 3311 3312 Notes: 3313 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3314 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3315 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3316 3317 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3318 3319 The format which is used for the sparse matrix input, is equivalent to a 3320 row-major ordering.. i.e for the following matrix, the input data expected is 3321 as shown: 3322 3323 1 0 0 3324 2 0 3 P0 3325 ------- 3326 4 5 6 P1 3327 3328 Process0 [P0]: rows_owned=[0,1] 3329 i = {0,1,3} [size = nrow+1 = 2+1] 3330 j = {0,0,2} [size = nz = 6] 3331 v = {1,2,3} [size = nz = 6] 3332 3333 Process1 [P1]: rows_owned=[2] 3334 i = {0,3} [size = nrow+1 = 1+1] 3335 j = {0,1,2} [size = nz = 6] 3336 v = {4,5,6} [size = nz = 6] 3337 3338 .keywords: matrix, aij, compressed row, sparse, parallel 3339 3340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3341 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3342 @*/ 3343 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3344 { 3345 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3346 3347 PetscFunctionBegin; 3348 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3349 if (f) { 3350 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3351 } 3352 PetscFunctionReturn(0); 3353 } 3354 3355 #undef __FUNCT__ 3356 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3357 /*@C 3358 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3359 (the default parallel PETSc format). For good matrix assembly performance 3360 the user should preallocate the matrix storage by setting the parameters 3361 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3362 performance can be increased by more than a factor of 50. 3363 3364 Collective on MPI_Comm 3365 3366 Input Parameters: 3367 + A - the matrix 3368 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3369 (same value is used for all local rows) 3370 . d_nnz - array containing the number of nonzeros in the various rows of the 3371 DIAGONAL portion of the local submatrix (possibly different for each row) 3372 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3373 The size of this array is equal to the number of local rows, i.e 'm'. 3374 You must leave room for the diagonal entry even if it is zero. 3375 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3376 submatrix (same value is used for all local rows). 3377 - o_nnz - array containing the number of nonzeros in the various rows of the 3378 OFF-DIAGONAL portion of the local submatrix (possibly different for 3379 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3380 structure. The size of this array is equal to the number 3381 of local rows, i.e 'm'. 3382 3383 If the *_nnz parameter is given then the *_nz parameter is ignored 3384 3385 The AIJ format (also called the Yale sparse matrix format or 3386 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3387 storage. The stored row and column indices begin with zero. See the users manual for details. 3388 3389 The parallel matrix is partitioned such that the first m0 rows belong to 3390 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3391 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3392 3393 The DIAGONAL portion of the local submatrix of a processor can be defined 3394 as the submatrix which is obtained by extraction the part corresponding 3395 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3396 first row that belongs to the processor, and r2 is the last row belonging 3397 to the this processor. This is a square mxm matrix. The remaining portion 3398 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3399 3400 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3401 3402 You can call MatGetInfo() to get information on how effective the preallocation was; 3403 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3404 You can also run with the option -info and look for messages with the string 3405 malloc in them to see if additional memory allocation was needed. 3406 3407 Example usage: 3408 3409 Consider the following 8x8 matrix with 34 non-zero values, that is 3410 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3411 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3412 as follows: 3413 3414 .vb 3415 1 2 0 | 0 3 0 | 0 4 3416 Proc0 0 5 6 | 7 0 0 | 8 0 3417 9 0 10 | 11 0 0 | 12 0 3418 ------------------------------------- 3419 13 0 14 | 15 16 17 | 0 0 3420 Proc1 0 18 0 | 19 20 21 | 0 0 3421 0 0 0 | 22 23 0 | 24 0 3422 ------------------------------------- 3423 Proc2 25 26 27 | 0 0 28 | 29 0 3424 30 0 0 | 31 32 33 | 0 34 3425 .ve 3426 3427 This can be represented as a collection of submatrices as: 3428 3429 .vb 3430 A B C 3431 D E F 3432 G H I 3433 .ve 3434 3435 Where the submatrices A,B,C are owned by proc0, D,E,F are 3436 owned by proc1, G,H,I are owned by proc2. 3437 3438 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3439 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3440 The 'M','N' parameters are 8,8, and have the same values on all procs. 3441 3442 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3443 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3444 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3445 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3446 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3447 matrix, ans [DF] as another SeqAIJ matrix. 3448 3449 When d_nz, o_nz parameters are specified, d_nz storage elements are 3450 allocated for every row of the local diagonal submatrix, and o_nz 3451 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3452 One way to choose d_nz and o_nz is to use the max nonzerors per local 3453 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3454 In this case, the values of d_nz,o_nz are: 3455 .vb 3456 proc0 : dnz = 2, o_nz = 2 3457 proc1 : dnz = 3, o_nz = 2 3458 proc2 : dnz = 1, o_nz = 4 3459 .ve 3460 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3461 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3462 for proc3. i.e we are using 12+15+10=37 storage locations to store 3463 34 values. 3464 3465 When d_nnz, o_nnz parameters are specified, the storage is specified 3466 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3467 In the above case the values for d_nnz,o_nnz are: 3468 .vb 3469 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3470 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3471 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3472 .ve 3473 Here the space allocated is sum of all the above values i.e 34, and 3474 hence pre-allocation is perfect. 3475 3476 Level: intermediate 3477 3478 .keywords: matrix, aij, compressed row, sparse, parallel 3479 3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3481 MPIAIJ, MatGetInfo() 3482 @*/ 3483 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3484 { 3485 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3486 3487 PetscFunctionBegin; 3488 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3489 if (f) { 3490 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3491 } 3492 PetscFunctionReturn(0); 3493 } 3494 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3497 /*@ 3498 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3499 CSR format the local rows. 3500 3501 Collective on MPI_Comm 3502 3503 Input Parameters: 3504 + comm - MPI communicator 3505 . m - number of local rows (Cannot be PETSC_DECIDE) 3506 . n - This value should be the same as the local size used in creating the 3507 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3508 calculated if N is given) For square matrices n is almost always m. 3509 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3510 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3511 . i - row indices 3512 . j - column indices 3513 - a - matrix values 3514 3515 Output Parameter: 3516 . mat - the matrix 3517 3518 Level: intermediate 3519 3520 Notes: 3521 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3522 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3523 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3524 3525 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3526 3527 The format which is used for the sparse matrix input, is equivalent to a 3528 row-major ordering.. i.e for the following matrix, the input data expected is 3529 as shown: 3530 3531 1 0 0 3532 2 0 3 P0 3533 ------- 3534 4 5 6 P1 3535 3536 Process0 [P0]: rows_owned=[0,1] 3537 i = {0,1,3} [size = nrow+1 = 2+1] 3538 j = {0,0,2} [size = nz = 6] 3539 v = {1,2,3} [size = nz = 6] 3540 3541 Process1 [P1]: rows_owned=[2] 3542 i = {0,3} [size = nrow+1 = 1+1] 3543 j = {0,1,2} [size = nz = 6] 3544 v = {4,5,6} [size = nz = 6] 3545 3546 .keywords: matrix, aij, compressed row, sparse, parallel 3547 3548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3549 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3550 @*/ 3551 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3552 { 3553 PetscErrorCode ierr; 3554 3555 PetscFunctionBegin; 3556 if (i[0]) { 3557 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3558 } 3559 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3560 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3561 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3562 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3563 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3564 PetscFunctionReturn(0); 3565 } 3566 3567 #undef __FUNCT__ 3568 #define __FUNCT__ "MatCreateMPIAIJ" 3569 /*@C 3570 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3571 (the default parallel PETSc format). For good matrix assembly performance 3572 the user should preallocate the matrix storage by setting the parameters 3573 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3574 performance can be increased by more than a factor of 50. 3575 3576 Collective on MPI_Comm 3577 3578 Input Parameters: 3579 + comm - MPI communicator 3580 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3581 This value should be the same as the local size used in creating the 3582 y vector for the matrix-vector product y = Ax. 3583 . n - This value should be the same as the local size used in creating the 3584 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3585 calculated if N is given) For square matrices n is almost always m. 3586 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3587 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3588 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3589 (same value is used for all local rows) 3590 . d_nnz - array containing the number of nonzeros in the various rows of the 3591 DIAGONAL portion of the local submatrix (possibly different for each row) 3592 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3593 The size of this array is equal to the number of local rows, i.e 'm'. 3594 You must leave room for the diagonal entry even if it is zero. 3595 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3596 submatrix (same value is used for all local rows). 3597 - o_nnz - array containing the number of nonzeros in the various rows of the 3598 OFF-DIAGONAL portion of the local submatrix (possibly different for 3599 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3600 structure. The size of this array is equal to the number 3601 of local rows, i.e 'm'. 3602 3603 Output Parameter: 3604 . A - the matrix 3605 3606 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3607 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3608 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3609 3610 Notes: 3611 If the *_nnz parameter is given then the *_nz parameter is ignored 3612 3613 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3614 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3615 storage requirements for this matrix. 3616 3617 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3618 processor than it must be used on all processors that share the object for 3619 that argument. 3620 3621 The user MUST specify either the local or global matrix dimensions 3622 (possibly both). 3623 3624 The parallel matrix is partitioned across processors such that the 3625 first m0 rows belong to process 0, the next m1 rows belong to 3626 process 1, the next m2 rows belong to process 2 etc.. where 3627 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3628 values corresponding to [m x N] submatrix. 3629 3630 The columns are logically partitioned with the n0 columns belonging 3631 to 0th partition, the next n1 columns belonging to the next 3632 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3633 3634 The DIAGONAL portion of the local submatrix on any given processor 3635 is the submatrix corresponding to the rows and columns m,n 3636 corresponding to the given processor. i.e diagonal matrix on 3637 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3638 etc. The remaining portion of the local submatrix [m x (N-n)] 3639 constitute the OFF-DIAGONAL portion. The example below better 3640 illustrates this concept. 3641 3642 For a square global matrix we define each processor's diagonal portion 3643 to be its local rows and the corresponding columns (a square submatrix); 3644 each processor's off-diagonal portion encompasses the remainder of the 3645 local matrix (a rectangular submatrix). 3646 3647 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3648 3649 When calling this routine with a single process communicator, a matrix of 3650 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3651 type of communicator, use the construction mechanism: 3652 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3653 3654 By default, this format uses inodes (identical nodes) when possible. 3655 We search for consecutive rows with the same nonzero structure, thereby 3656 reusing matrix information to achieve increased efficiency. 3657 3658 Options Database Keys: 3659 + -mat_no_inode - Do not use inodes 3660 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3661 - -mat_aij_oneindex - Internally use indexing starting at 1 3662 rather than 0. Note that when calling MatSetValues(), 3663 the user still MUST index entries starting at 0! 3664 3665 3666 Example usage: 3667 3668 Consider the following 8x8 matrix with 34 non-zero values, that is 3669 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3670 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3671 as follows: 3672 3673 .vb 3674 1 2 0 | 0 3 0 | 0 4 3675 Proc0 0 5 6 | 7 0 0 | 8 0 3676 9 0 10 | 11 0 0 | 12 0 3677 ------------------------------------- 3678 13 0 14 | 15 16 17 | 0 0 3679 Proc1 0 18 0 | 19 20 21 | 0 0 3680 0 0 0 | 22 23 0 | 24 0 3681 ------------------------------------- 3682 Proc2 25 26 27 | 0 0 28 | 29 0 3683 30 0 0 | 31 32 33 | 0 34 3684 .ve 3685 3686 This can be represented as a collection of submatrices as: 3687 3688 .vb 3689 A B C 3690 D E F 3691 G H I 3692 .ve 3693 3694 Where the submatrices A,B,C are owned by proc0, D,E,F are 3695 owned by proc1, G,H,I are owned by proc2. 3696 3697 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3698 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3699 The 'M','N' parameters are 8,8, and have the same values on all procs. 3700 3701 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3702 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3703 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3704 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3705 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3706 matrix, ans [DF] as another SeqAIJ matrix. 3707 3708 When d_nz, o_nz parameters are specified, d_nz storage elements are 3709 allocated for every row of the local diagonal submatrix, and o_nz 3710 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3711 One way to choose d_nz and o_nz is to use the max nonzerors per local 3712 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3713 In this case, the values of d_nz,o_nz are: 3714 .vb 3715 proc0 : dnz = 2, o_nz = 2 3716 proc1 : dnz = 3, o_nz = 2 3717 proc2 : dnz = 1, o_nz = 4 3718 .ve 3719 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3720 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3721 for proc3. i.e we are using 12+15+10=37 storage locations to store 3722 34 values. 3723 3724 When d_nnz, o_nnz parameters are specified, the storage is specified 3725 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3726 In the above case the values for d_nnz,o_nnz are: 3727 .vb 3728 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3729 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3730 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3731 .ve 3732 Here the space allocated is sum of all the above values i.e 34, and 3733 hence pre-allocation is perfect. 3734 3735 Level: intermediate 3736 3737 .keywords: matrix, aij, compressed row, sparse, parallel 3738 3739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3740 MPIAIJ, MatCreateMPIAIJWithArrays() 3741 @*/ 3742 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3743 { 3744 PetscErrorCode ierr; 3745 PetscMPIInt size; 3746 3747 PetscFunctionBegin; 3748 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3749 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3750 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3751 if (size > 1) { 3752 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3753 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3754 } else { 3755 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3756 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3757 } 3758 PetscFunctionReturn(0); 3759 } 3760 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3763 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3764 { 3765 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3766 3767 PetscFunctionBegin; 3768 *Ad = a->A; 3769 *Ao = a->B; 3770 *colmap = a->garray; 3771 PetscFunctionReturn(0); 3772 } 3773 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3776 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3777 { 3778 PetscErrorCode ierr; 3779 PetscInt i; 3780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3781 3782 PetscFunctionBegin; 3783 if (coloring->ctype == IS_COLORING_GLOBAL) { 3784 ISColoringValue *allcolors,*colors; 3785 ISColoring ocoloring; 3786 3787 /* set coloring for diagonal portion */ 3788 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3789 3790 /* set coloring for off-diagonal portion */ 3791 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3792 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3793 for (i=0; i<a->B->cmap->n; i++) { 3794 colors[i] = allcolors[a->garray[i]]; 3795 } 3796 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3797 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3798 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3799 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3800 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3801 ISColoringValue *colors; 3802 PetscInt *larray; 3803 ISColoring ocoloring; 3804 3805 /* set coloring for diagonal portion */ 3806 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3807 for (i=0; i<a->A->cmap->n; i++) { 3808 larray[i] = i + A->cmap->rstart; 3809 } 3810 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3811 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3812 for (i=0; i<a->A->cmap->n; i++) { 3813 colors[i] = coloring->colors[larray[i]]; 3814 } 3815 ierr = PetscFree(larray);CHKERRQ(ierr); 3816 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3817 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3818 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3819 3820 /* set coloring for off-diagonal portion */ 3821 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3822 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3823 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3824 for (i=0; i<a->B->cmap->n; i++) { 3825 colors[i] = coloring->colors[larray[i]]; 3826 } 3827 ierr = PetscFree(larray);CHKERRQ(ierr); 3828 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3829 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3830 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3831 } else { 3832 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3833 } 3834 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #if defined(PETSC_HAVE_ADIC) 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3841 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3842 { 3843 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3844 PetscErrorCode ierr; 3845 3846 PetscFunctionBegin; 3847 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3848 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3849 PetscFunctionReturn(0); 3850 } 3851 #endif 3852 3853 #undef __FUNCT__ 3854 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3855 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3856 { 3857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3858 PetscErrorCode ierr; 3859 3860 PetscFunctionBegin; 3861 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3862 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3863 PetscFunctionReturn(0); 3864 } 3865 3866 #undef __FUNCT__ 3867 #define __FUNCT__ "MatMerge" 3868 /*@ 3869 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3870 matrices from each processor 3871 3872 Collective on MPI_Comm 3873 3874 Input Parameters: 3875 + comm - the communicators the parallel matrix will live on 3876 . inmat - the input sequential matrices 3877 . n - number of local columns (or PETSC_DECIDE) 3878 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3879 3880 Output Parameter: 3881 . outmat - the parallel matrix generated 3882 3883 Level: advanced 3884 3885 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3886 3887 @*/ 3888 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3889 { 3890 PetscErrorCode ierr; 3891 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3892 PetscInt *indx; 3893 PetscScalar *values; 3894 3895 PetscFunctionBegin; 3896 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3897 if (scall == MAT_INITIAL_MATRIX){ 3898 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3899 if (n == PETSC_DECIDE){ 3900 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3901 } 3902 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3903 rstart -= m; 3904 3905 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3906 for (i=0;i<m;i++) { 3907 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3908 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3909 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3910 } 3911 /* This routine will ONLY return MPIAIJ type matrix */ 3912 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3913 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3914 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3915 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3916 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3917 3918 } else if (scall == MAT_REUSE_MATRIX){ 3919 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3920 } else { 3921 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3922 } 3923 3924 for (i=0;i<m;i++) { 3925 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3926 Ii = i + rstart; 3927 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3928 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3929 } 3930 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3931 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 3934 PetscFunctionReturn(0); 3935 } 3936 3937 #undef __FUNCT__ 3938 #define __FUNCT__ "MatFileSplit" 3939 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3940 { 3941 PetscErrorCode ierr; 3942 PetscMPIInt rank; 3943 PetscInt m,N,i,rstart,nnz; 3944 size_t len; 3945 const PetscInt *indx; 3946 PetscViewer out; 3947 char *name; 3948 Mat B; 3949 const PetscScalar *values; 3950 3951 PetscFunctionBegin; 3952 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3953 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3954 /* Should this be the type of the diagonal block of A? */ 3955 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3956 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3957 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3958 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3959 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3960 for (i=0;i<m;i++) { 3961 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3962 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3963 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3964 } 3965 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3966 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3967 3968 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3969 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3970 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3971 sprintf(name,"%s.%d",outfile,rank); 3972 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3973 ierr = PetscFree(name); 3974 ierr = MatView(B,out);CHKERRQ(ierr); 3975 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3976 ierr = MatDestroy(B);CHKERRQ(ierr); 3977 PetscFunctionReturn(0); 3978 } 3979 3980 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3981 #undef __FUNCT__ 3982 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3983 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3984 { 3985 PetscErrorCode ierr; 3986 Mat_Merge_SeqsToMPI *merge; 3987 PetscContainer container; 3988 3989 PetscFunctionBegin; 3990 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3991 if (container) { 3992 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4005 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4006 4007 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4008 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4009 } 4010 ierr = PetscFree(merge);CHKERRQ(ierr); 4011 4012 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4013 PetscFunctionReturn(0); 4014 } 4015 4016 #include "../src/mat/utils/freespace.h" 4017 #include "petscbt.h" 4018 4019 #undef __FUNCT__ 4020 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4021 /*@C 4022 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4023 matrices from each processor 4024 4025 Collective on MPI_Comm 4026 4027 Input Parameters: 4028 + comm - the communicators the parallel matrix will live on 4029 . seqmat - the input sequential matrices 4030 . m - number of local rows (or PETSC_DECIDE) 4031 . n - number of local columns (or PETSC_DECIDE) 4032 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4033 4034 Output Parameter: 4035 . mpimat - the parallel matrix generated 4036 4037 Level: advanced 4038 4039 Notes: 4040 The dimensions of the sequential matrix in each processor MUST be the same. 4041 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4042 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4043 @*/ 4044 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4045 { 4046 PetscErrorCode ierr; 4047 MPI_Comm comm=((PetscObject)mpimat)->comm; 4048 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4049 PetscMPIInt size,rank,taga,*len_s; 4050 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4051 PetscInt proc,m; 4052 PetscInt **buf_ri,**buf_rj; 4053 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4054 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4055 MPI_Request *s_waits,*r_waits; 4056 MPI_Status *status; 4057 MatScalar *aa=a->a; 4058 MatScalar **abuf_r,*ba_i; 4059 Mat_Merge_SeqsToMPI *merge; 4060 PetscContainer container; 4061 4062 PetscFunctionBegin; 4063 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4064 4065 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4066 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4067 4068 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4069 if (container) { 4070 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4071 } 4072 bi = merge->bi; 4073 bj = merge->bj; 4074 buf_ri = merge->buf_ri; 4075 buf_rj = merge->buf_rj; 4076 4077 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4078 owners = merge->rowmap->range; 4079 len_s = merge->len_s; 4080 4081 /* send and recv matrix values */ 4082 /*-----------------------------*/ 4083 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4084 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4085 4086 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4087 for (proc=0,k=0; proc<size; proc++){ 4088 if (!len_s[proc]) continue; 4089 i = owners[proc]; 4090 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4091 k++; 4092 } 4093 4094 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4095 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4096 ierr = PetscFree(status);CHKERRQ(ierr); 4097 4098 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4099 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4100 4101 /* insert mat values of mpimat */ 4102 /*----------------------------*/ 4103 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4104 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4105 4106 for (k=0; k<merge->nrecv; k++){ 4107 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4108 nrows = *(buf_ri_k[k]); 4109 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4110 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4111 } 4112 4113 /* set values of ba */ 4114 m = merge->rowmap->n; 4115 for (i=0; i<m; i++) { 4116 arow = owners[rank] + i; 4117 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4118 bnzi = bi[i+1] - bi[i]; 4119 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4120 4121 /* add local non-zero vals of this proc's seqmat into ba */ 4122 anzi = ai[arow+1] - ai[arow]; 4123 aj = a->j + ai[arow]; 4124 aa = a->a + ai[arow]; 4125 nextaj = 0; 4126 for (j=0; nextaj<anzi; j++){ 4127 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4128 ba_i[j] += aa[nextaj++]; 4129 } 4130 } 4131 4132 /* add received vals into ba */ 4133 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4134 /* i-th row */ 4135 if (i == *nextrow[k]) { 4136 anzi = *(nextai[k]+1) - *nextai[k]; 4137 aj = buf_rj[k] + *(nextai[k]); 4138 aa = abuf_r[k] + *(nextai[k]); 4139 nextaj = 0; 4140 for (j=0; nextaj<anzi; j++){ 4141 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4142 ba_i[j] += aa[nextaj++]; 4143 } 4144 } 4145 nextrow[k]++; nextai[k]++; 4146 } 4147 } 4148 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4149 } 4150 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4151 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4152 4153 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4154 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4155 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4156 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4157 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4158 PetscFunctionReturn(0); 4159 } 4160 4161 #undef __FUNCT__ 4162 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4163 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4164 { 4165 PetscErrorCode ierr; 4166 Mat B_mpi; 4167 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4168 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4169 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4170 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4171 PetscInt len,proc,*dnz,*onz; 4172 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4173 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4174 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4175 MPI_Status *status; 4176 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4177 PetscBT lnkbt; 4178 Mat_Merge_SeqsToMPI *merge; 4179 PetscContainer container; 4180 4181 PetscFunctionBegin; 4182 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4183 4184 /* make sure it is a PETSc comm */ 4185 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4186 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4187 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4188 4189 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4190 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4191 4192 /* determine row ownership */ 4193 /*---------------------------------------------------------*/ 4194 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4195 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4196 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4197 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4198 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4199 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4200 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4201 4202 m = merge->rowmap->n; 4203 M = merge->rowmap->N; 4204 owners = merge->rowmap->range; 4205 4206 /* determine the number of messages to send, their lengths */ 4207 /*---------------------------------------------------------*/ 4208 len_s = merge->len_s; 4209 4210 len = 0; /* length of buf_si[] */ 4211 merge->nsend = 0; 4212 for (proc=0; proc<size; proc++){ 4213 len_si[proc] = 0; 4214 if (proc == rank){ 4215 len_s[proc] = 0; 4216 } else { 4217 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4218 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4219 } 4220 if (len_s[proc]) { 4221 merge->nsend++; 4222 nrows = 0; 4223 for (i=owners[proc]; i<owners[proc+1]; i++){ 4224 if (ai[i+1] > ai[i]) nrows++; 4225 } 4226 len_si[proc] = 2*(nrows+1); 4227 len += len_si[proc]; 4228 } 4229 } 4230 4231 /* determine the number and length of messages to receive for ij-structure */ 4232 /*-------------------------------------------------------------------------*/ 4233 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4234 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4235 4236 /* post the Irecv of j-structure */ 4237 /*-------------------------------*/ 4238 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4239 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4240 4241 /* post the Isend of j-structure */ 4242 /*--------------------------------*/ 4243 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4244 4245 for (proc=0, k=0; proc<size; proc++){ 4246 if (!len_s[proc]) continue; 4247 i = owners[proc]; 4248 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4249 k++; 4250 } 4251 4252 /* receives and sends of j-structure are complete */ 4253 /*------------------------------------------------*/ 4254 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4255 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4256 4257 /* send and recv i-structure */ 4258 /*---------------------------*/ 4259 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4260 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4261 4262 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4263 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4264 for (proc=0,k=0; proc<size; proc++){ 4265 if (!len_s[proc]) continue; 4266 /* form outgoing message for i-structure: 4267 buf_si[0]: nrows to be sent 4268 [1:nrows]: row index (global) 4269 [nrows+1:2*nrows+1]: i-structure index 4270 */ 4271 /*-------------------------------------------*/ 4272 nrows = len_si[proc]/2 - 1; 4273 buf_si_i = buf_si + nrows+1; 4274 buf_si[0] = nrows; 4275 buf_si_i[0] = 0; 4276 nrows = 0; 4277 for (i=owners[proc]; i<owners[proc+1]; i++){ 4278 anzi = ai[i+1] - ai[i]; 4279 if (anzi) { 4280 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4281 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4282 nrows++; 4283 } 4284 } 4285 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4286 k++; 4287 buf_si += len_si[proc]; 4288 } 4289 4290 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4291 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4292 4293 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4294 for (i=0; i<merge->nrecv; i++){ 4295 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4296 } 4297 4298 ierr = PetscFree(len_si);CHKERRQ(ierr); 4299 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4300 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4301 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4302 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4303 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4304 ierr = PetscFree(status);CHKERRQ(ierr); 4305 4306 /* compute a local seq matrix in each processor */ 4307 /*----------------------------------------------*/ 4308 /* allocate bi array and free space for accumulating nonzero column info */ 4309 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4310 bi[0] = 0; 4311 4312 /* create and initialize a linked list */ 4313 nlnk = N+1; 4314 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4315 4316 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4317 len = 0; 4318 len = ai[owners[rank+1]] - ai[owners[rank]]; 4319 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4320 current_space = free_space; 4321 4322 /* determine symbolic info for each local row */ 4323 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4324 4325 for (k=0; k<merge->nrecv; k++){ 4326 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4327 nrows = *buf_ri_k[k]; 4328 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4329 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4330 } 4331 4332 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4333 len = 0; 4334 for (i=0;i<m;i++) { 4335 bnzi = 0; 4336 /* add local non-zero cols of this proc's seqmat into lnk */ 4337 arow = owners[rank] + i; 4338 anzi = ai[arow+1] - ai[arow]; 4339 aj = a->j + ai[arow]; 4340 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4341 bnzi += nlnk; 4342 /* add received col data into lnk */ 4343 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4344 if (i == *nextrow[k]) { /* i-th row */ 4345 anzi = *(nextai[k]+1) - *nextai[k]; 4346 aj = buf_rj[k] + *nextai[k]; 4347 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4348 bnzi += nlnk; 4349 nextrow[k]++; nextai[k]++; 4350 } 4351 } 4352 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4353 4354 /* if free space is not available, make more free space */ 4355 if (current_space->local_remaining<bnzi) { 4356 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4357 nspacedouble++; 4358 } 4359 /* copy data into free space, then initialize lnk */ 4360 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4361 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4362 4363 current_space->array += bnzi; 4364 current_space->local_used += bnzi; 4365 current_space->local_remaining -= bnzi; 4366 4367 bi[i+1] = bi[i] + bnzi; 4368 } 4369 4370 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4371 4372 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4373 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4374 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4375 4376 /* create symbolic parallel matrix B_mpi */ 4377 /*---------------------------------------*/ 4378 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4379 if (n==PETSC_DECIDE) { 4380 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4381 } else { 4382 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4383 } 4384 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4385 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4386 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4387 4388 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4389 B_mpi->assembled = PETSC_FALSE; 4390 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4391 merge->bi = bi; 4392 merge->bj = bj; 4393 merge->buf_ri = buf_ri; 4394 merge->buf_rj = buf_rj; 4395 merge->coi = PETSC_NULL; 4396 merge->coj = PETSC_NULL; 4397 merge->owners_co = PETSC_NULL; 4398 4399 /* attach the supporting struct to B_mpi for reuse */ 4400 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4401 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4402 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4403 *mpimat = B_mpi; 4404 4405 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4406 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4407 PetscFunctionReturn(0); 4408 } 4409 4410 #undef __FUNCT__ 4411 #define __FUNCT__ "MatMerge_SeqsToMPI" 4412 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4413 { 4414 PetscErrorCode ierr; 4415 4416 PetscFunctionBegin; 4417 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4418 if (scall == MAT_INITIAL_MATRIX){ 4419 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4420 } 4421 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4422 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4423 PetscFunctionReturn(0); 4424 } 4425 4426 #undef __FUNCT__ 4427 #define __FUNCT__ "MatGetLocalMat" 4428 /*@ 4429 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4430 4431 Not Collective 4432 4433 Input Parameters: 4434 + A - the matrix 4435 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4436 4437 Output Parameter: 4438 . A_loc - the local sequential matrix generated 4439 4440 Level: developer 4441 4442 @*/ 4443 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4444 { 4445 PetscErrorCode ierr; 4446 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4447 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4448 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4449 MatScalar *aa=a->a,*ba=b->a,*cam; 4450 PetscScalar *ca; 4451 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4452 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4453 4454 PetscFunctionBegin; 4455 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4456 if (scall == MAT_INITIAL_MATRIX){ 4457 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4458 ci[0] = 0; 4459 for (i=0; i<am; i++){ 4460 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4461 } 4462 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4463 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4464 k = 0; 4465 for (i=0; i<am; i++) { 4466 ncols_o = bi[i+1] - bi[i]; 4467 ncols_d = ai[i+1] - ai[i]; 4468 /* off-diagonal portion of A */ 4469 for (jo=0; jo<ncols_o; jo++) { 4470 col = cmap[*bj]; 4471 if (col >= cstart) break; 4472 cj[k] = col; bj++; 4473 ca[k++] = *ba++; 4474 } 4475 /* diagonal portion of A */ 4476 for (j=0; j<ncols_d; j++) { 4477 cj[k] = cstart + *aj++; 4478 ca[k++] = *aa++; 4479 } 4480 /* off-diagonal portion of A */ 4481 for (j=jo; j<ncols_o; j++) { 4482 cj[k] = cmap[*bj++]; 4483 ca[k++] = *ba++; 4484 } 4485 } 4486 /* put together the new matrix */ 4487 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4488 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4489 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4490 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4491 mat->free_a = PETSC_TRUE; 4492 mat->free_ij = PETSC_TRUE; 4493 mat->nonew = 0; 4494 } else if (scall == MAT_REUSE_MATRIX){ 4495 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4496 ci = mat->i; cj = mat->j; cam = mat->a; 4497 for (i=0; i<am; i++) { 4498 /* off-diagonal portion of A */ 4499 ncols_o = bi[i+1] - bi[i]; 4500 for (jo=0; jo<ncols_o; jo++) { 4501 col = cmap[*bj]; 4502 if (col >= cstart) break; 4503 *cam++ = *ba++; bj++; 4504 } 4505 /* diagonal portion of A */ 4506 ncols_d = ai[i+1] - ai[i]; 4507 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4508 /* off-diagonal portion of A */ 4509 for (j=jo; j<ncols_o; j++) { 4510 *cam++ = *ba++; bj++; 4511 } 4512 } 4513 } else { 4514 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4515 } 4516 4517 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4518 PetscFunctionReturn(0); 4519 } 4520 4521 #undef __FUNCT__ 4522 #define __FUNCT__ "MatGetLocalMatCondensed" 4523 /*@C 4524 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4525 4526 Not Collective 4527 4528 Input Parameters: 4529 + A - the matrix 4530 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4531 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4532 4533 Output Parameter: 4534 . A_loc - the local sequential matrix generated 4535 4536 Level: developer 4537 4538 @*/ 4539 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4540 { 4541 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4542 PetscErrorCode ierr; 4543 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4544 IS isrowa,iscola; 4545 Mat *aloc; 4546 4547 PetscFunctionBegin; 4548 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4549 if (!row){ 4550 start = A->rmap->rstart; end = A->rmap->rend; 4551 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4552 } else { 4553 isrowa = *row; 4554 } 4555 if (!col){ 4556 start = A->cmap->rstart; 4557 cmap = a->garray; 4558 nzA = a->A->cmap->n; 4559 nzB = a->B->cmap->n; 4560 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4561 ncols = 0; 4562 for (i=0; i<nzB; i++) { 4563 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4564 else break; 4565 } 4566 imark = i; 4567 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4568 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4569 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4570 ierr = PetscFree(idx);CHKERRQ(ierr); 4571 } else { 4572 iscola = *col; 4573 } 4574 if (scall != MAT_INITIAL_MATRIX){ 4575 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4576 aloc[0] = *A_loc; 4577 } 4578 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4579 *A_loc = aloc[0]; 4580 ierr = PetscFree(aloc);CHKERRQ(ierr); 4581 if (!row){ 4582 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4583 } 4584 if (!col){ 4585 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4586 } 4587 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4588 PetscFunctionReturn(0); 4589 } 4590 4591 #undef __FUNCT__ 4592 #define __FUNCT__ "MatGetBrowsOfAcols" 4593 /*@C 4594 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4595 4596 Collective on Mat 4597 4598 Input Parameters: 4599 + A,B - the matrices in mpiaij format 4600 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4601 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4602 4603 Output Parameter: 4604 + rowb, colb - index sets of rows and columns of B to extract 4605 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4606 - B_seq - the sequential matrix generated 4607 4608 Level: developer 4609 4610 @*/ 4611 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4612 { 4613 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4614 PetscErrorCode ierr; 4615 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4616 IS isrowb,iscolb; 4617 Mat *bseq; 4618 4619 PetscFunctionBegin; 4620 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4621 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4622 } 4623 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4624 4625 if (scall == MAT_INITIAL_MATRIX){ 4626 start = A->cmap->rstart; 4627 cmap = a->garray; 4628 nzA = a->A->cmap->n; 4629 nzB = a->B->cmap->n; 4630 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4631 ncols = 0; 4632 for (i=0; i<nzB; i++) { /* row < local row index */ 4633 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4634 else break; 4635 } 4636 imark = i; 4637 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4638 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4639 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4640 ierr = PetscFree(idx);CHKERRQ(ierr); 4641 *brstart = imark; 4642 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4643 } else { 4644 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4645 isrowb = *rowb; iscolb = *colb; 4646 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4647 bseq[0] = *B_seq; 4648 } 4649 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4650 *B_seq = bseq[0]; 4651 ierr = PetscFree(bseq);CHKERRQ(ierr); 4652 if (!rowb){ 4653 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4654 } else { 4655 *rowb = isrowb; 4656 } 4657 if (!colb){ 4658 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4659 } else { 4660 *colb = iscolb; 4661 } 4662 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 #undef __FUNCT__ 4667 #define __FUNCT__ "MatGetBrowsOfAoCols" 4668 /*@C 4669 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4670 of the OFF-DIAGONAL portion of local A 4671 4672 Collective on Mat 4673 4674 Input Parameters: 4675 + A,B - the matrices in mpiaij format 4676 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4677 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4678 . startsj_r - similar to startsj for receives 4679 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4680 4681 Output Parameter: 4682 + B_oth - the sequential matrix generated 4683 4684 Level: developer 4685 4686 @*/ 4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4688 { 4689 VecScatter_MPI_General *gen_to,*gen_from; 4690 PetscErrorCode ierr; 4691 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4692 Mat_SeqAIJ *b_oth; 4693 VecScatter ctx=a->Mvctx; 4694 MPI_Comm comm=((PetscObject)ctx)->comm; 4695 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4696 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4697 PetscScalar *rvalues,*svalues; 4698 MatScalar *b_otha,*bufa,*bufA; 4699 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4700 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4701 MPI_Status *sstatus,rstatus; 4702 PetscMPIInt jj; 4703 PetscInt *cols,sbs,rbs; 4704 PetscScalar *vals; 4705 4706 PetscFunctionBegin; 4707 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4708 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4709 } 4710 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4711 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4712 4713 gen_to = (VecScatter_MPI_General*)ctx->todata; 4714 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4715 rvalues = gen_from->values; /* holds the length of receiving row */ 4716 svalues = gen_to->values; /* holds the length of sending row */ 4717 nrecvs = gen_from->n; 4718 nsends = gen_to->n; 4719 4720 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4721 srow = gen_to->indices; /* local row index to be sent */ 4722 sstarts = gen_to->starts; 4723 sprocs = gen_to->procs; 4724 sstatus = gen_to->sstatus; 4725 sbs = gen_to->bs; 4726 rstarts = gen_from->starts; 4727 rprocs = gen_from->procs; 4728 rbs = gen_from->bs; 4729 4730 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4731 if (scall == MAT_INITIAL_MATRIX){ 4732 /* i-array */ 4733 /*---------*/ 4734 /* post receives */ 4735 for (i=0; i<nrecvs; i++){ 4736 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4737 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4738 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4739 } 4740 4741 /* pack the outgoing message */ 4742 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4743 sstartsj[0] = 0; rstartsj[0] = 0; 4744 len = 0; /* total length of j or a array to be sent */ 4745 k = 0; 4746 for (i=0; i<nsends; i++){ 4747 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4748 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4749 for (j=0; j<nrows; j++) { 4750 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4751 for (l=0; l<sbs; l++){ 4752 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4753 rowlen[j*sbs+l] = ncols; 4754 len += ncols; 4755 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4756 } 4757 k++; 4758 } 4759 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4760 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4761 } 4762 /* recvs and sends of i-array are completed */ 4763 i = nrecvs; 4764 while (i--) { 4765 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4766 } 4767 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4768 4769 /* allocate buffers for sending j and a arrays */ 4770 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4771 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4772 4773 /* create i-array of B_oth */ 4774 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4775 b_othi[0] = 0; 4776 len = 0; /* total length of j or a array to be received */ 4777 k = 0; 4778 for (i=0; i<nrecvs; i++){ 4779 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4780 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4781 for (j=0; j<nrows; j++) { 4782 b_othi[k+1] = b_othi[k] + rowlen[j]; 4783 len += rowlen[j]; k++; 4784 } 4785 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4786 } 4787 4788 /* allocate space for j and a arrrays of B_oth */ 4789 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4790 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4791 4792 /* j-array */ 4793 /*---------*/ 4794 /* post receives of j-array */ 4795 for (i=0; i<nrecvs; i++){ 4796 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4797 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4798 } 4799 4800 /* pack the outgoing message j-array */ 4801 k = 0; 4802 for (i=0; i<nsends; i++){ 4803 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4804 bufJ = bufj+sstartsj[i]; 4805 for (j=0; j<nrows; j++) { 4806 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4807 for (ll=0; ll<sbs; ll++){ 4808 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4809 for (l=0; l<ncols; l++){ 4810 *bufJ++ = cols[l]; 4811 } 4812 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4813 } 4814 } 4815 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4816 } 4817 4818 /* recvs and sends of j-array are completed */ 4819 i = nrecvs; 4820 while (i--) { 4821 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4822 } 4823 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4824 } else if (scall == MAT_REUSE_MATRIX){ 4825 sstartsj = *startsj; 4826 rstartsj = *startsj_r; 4827 bufa = *bufa_ptr; 4828 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4829 b_otha = b_oth->a; 4830 } else { 4831 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4832 } 4833 4834 /* a-array */ 4835 /*---------*/ 4836 /* post receives of a-array */ 4837 for (i=0; i<nrecvs; i++){ 4838 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4839 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4840 } 4841 4842 /* pack the outgoing message a-array */ 4843 k = 0; 4844 for (i=0; i<nsends; i++){ 4845 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4846 bufA = bufa+sstartsj[i]; 4847 for (j=0; j<nrows; j++) { 4848 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4849 for (ll=0; ll<sbs; ll++){ 4850 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4851 for (l=0; l<ncols; l++){ 4852 *bufA++ = vals[l]; 4853 } 4854 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4855 } 4856 } 4857 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4858 } 4859 /* recvs and sends of a-array are completed */ 4860 i = nrecvs; 4861 while (i--) { 4862 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4863 } 4864 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4865 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4866 4867 if (scall == MAT_INITIAL_MATRIX){ 4868 /* put together the new matrix */ 4869 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4870 4871 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4872 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4873 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4874 b_oth->free_a = PETSC_TRUE; 4875 b_oth->free_ij = PETSC_TRUE; 4876 b_oth->nonew = 0; 4877 4878 ierr = PetscFree(bufj);CHKERRQ(ierr); 4879 if (!startsj || !bufa_ptr){ 4880 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4881 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4882 } else { 4883 *startsj = sstartsj; 4884 *startsj_r = rstartsj; 4885 *bufa_ptr = bufa; 4886 } 4887 } 4888 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4889 PetscFunctionReturn(0); 4890 } 4891 4892 #undef __FUNCT__ 4893 #define __FUNCT__ "MatGetCommunicationStructs" 4894 /*@C 4895 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4896 4897 Not Collective 4898 4899 Input Parameters: 4900 . A - The matrix in mpiaij format 4901 4902 Output Parameter: 4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4904 . colmap - A map from global column index to local index into lvec 4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4906 4907 Level: developer 4908 4909 @*/ 4910 #if defined (PETSC_USE_CTABLE) 4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4912 #else 4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4914 #endif 4915 { 4916 Mat_MPIAIJ *a; 4917 4918 PetscFunctionBegin; 4919 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4920 PetscValidPointer(lvec, 2) 4921 PetscValidPointer(colmap, 3) 4922 PetscValidPointer(multScatter, 4) 4923 a = (Mat_MPIAIJ *) A->data; 4924 if (lvec) *lvec = a->lvec; 4925 if (colmap) *colmap = a->colmap; 4926 if (multScatter) *multScatter = a->Mvctx; 4927 PetscFunctionReturn(0); 4928 } 4929 4930 EXTERN_C_BEGIN 4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4932 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4933 EXTERN_C_END 4934 4935 #undef __FUNCT__ 4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4937 /* 4938 Computes (B'*A')' since computing B*A directly is untenable 4939 4940 n p p 4941 ( ) ( ) ( ) 4942 m ( A ) * n ( B ) = m ( C ) 4943 ( ) ( ) ( ) 4944 4945 */ 4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4947 { 4948 PetscErrorCode ierr; 4949 Mat At,Bt,Ct; 4950 4951 PetscFunctionBegin; 4952 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4953 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4954 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4955 ierr = MatDestroy(At);CHKERRQ(ierr); 4956 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4957 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4958 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4965 { 4966 PetscErrorCode ierr; 4967 PetscInt m=A->rmap->n,n=B->cmap->n; 4968 Mat Cmat; 4969 4970 PetscFunctionBegin; 4971 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4972 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4973 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4974 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4975 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4976 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4977 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4978 *C = Cmat; 4979 PetscFunctionReturn(0); 4980 } 4981 4982 /* ----------------------------------------------------------------*/ 4983 #undef __FUNCT__ 4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4986 { 4987 PetscErrorCode ierr; 4988 4989 PetscFunctionBegin; 4990 if (scall == MAT_INITIAL_MATRIX){ 4991 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4992 } 4993 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4994 PetscFunctionReturn(0); 4995 } 4996 4997 EXTERN_C_BEGIN 4998 #if defined(PETSC_HAVE_MUMPS) 4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5000 #endif 5001 #if defined(PETSC_HAVE_PASTIX) 5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5003 #endif 5004 #if defined(PETSC_HAVE_SUPERLU_DIST) 5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5006 #endif 5007 #if defined(PETSC_HAVE_SPOOLES) 5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5009 #endif 5010 EXTERN_C_END 5011 5012 /*MC 5013 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5014 5015 Options Database Keys: 5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5017 5018 Level: beginner 5019 5020 .seealso: MatCreateMPIAIJ() 5021 M*/ 5022 5023 EXTERN_C_BEGIN 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatCreate_MPIAIJ" 5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5027 { 5028 Mat_MPIAIJ *b; 5029 PetscErrorCode ierr; 5030 PetscMPIInt size; 5031 5032 PetscFunctionBegin; 5033 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5034 5035 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5036 B->data = (void*)b; 5037 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5038 B->rmap->bs = 1; 5039 B->assembled = PETSC_FALSE; 5040 B->mapping = 0; 5041 5042 B->insertmode = NOT_SET_VALUES; 5043 b->size = size; 5044 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5045 5046 /* build cache for off array entries formed */ 5047 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5048 b->donotstash = PETSC_FALSE; 5049 b->colmap = 0; 5050 b->garray = 0; 5051 b->roworiented = PETSC_TRUE; 5052 5053 /* stuff used for matrix vector multiply */ 5054 b->lvec = PETSC_NULL; 5055 b->Mvctx = PETSC_NULL; 5056 5057 /* stuff for MatGetRow() */ 5058 b->rowindices = 0; 5059 b->rowvalues = 0; 5060 b->getrowactive = PETSC_FALSE; 5061 5062 #if defined(PETSC_HAVE_SPOOLES) 5063 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5064 "MatGetFactor_mpiaij_spooles", 5065 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5066 #endif 5067 #if defined(PETSC_HAVE_MUMPS) 5068 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5069 "MatGetFactor_mpiaij_mumps", 5070 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5071 #endif 5072 #if defined(PETSC_HAVE_PASTIX) 5073 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5074 "MatGetFactor_mpiaij_pastix", 5075 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5076 #endif 5077 #if defined(PETSC_HAVE_SUPERLU_DIST) 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5079 "MatGetFactor_mpiaij_superlu_dist", 5080 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5081 #endif 5082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5083 "MatStoreValues_MPIAIJ", 5084 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5086 "MatRetrieveValues_MPIAIJ", 5087 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5089 "MatGetDiagonalBlock_MPIAIJ", 5090 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5092 "MatIsTranspose_MPIAIJ", 5093 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5095 "MatMPIAIJSetPreallocation_MPIAIJ", 5096 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5098 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5099 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5101 "MatDiagonalScaleLocal_MPIAIJ", 5102 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5104 "MatConvert_MPIAIJ_MPICSRPERM", 5105 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5107 "MatConvert_MPIAIJ_MPICRL", 5108 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5110 "MatConvert_MPIAIJ_MPISBAIJ", 5111 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5113 "MatMatMult_MPIDense_MPIAIJ", 5114 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5116 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5117 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5119 "MatMatMultNumeric_MPIDense_MPIAIJ", 5120 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5121 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 EXTERN_C_END 5125 5126 #undef __FUNCT__ 5127 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5128 /*@ 5129 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5130 and "off-diagonal" part of the matrix in CSR format. 5131 5132 Collective on MPI_Comm 5133 5134 Input Parameters: 5135 + comm - MPI communicator 5136 . m - number of local rows (Cannot be PETSC_DECIDE) 5137 . n - This value should be the same as the local size used in creating the 5138 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5139 calculated if N is given) For square matrices n is almost always m. 5140 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5141 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5142 . i - row indices for "diagonal" portion of matrix 5143 . j - column indices 5144 . a - matrix values 5145 . oi - row indices for "off-diagonal" portion of matrix 5146 . oj - column indices 5147 - oa - matrix values 5148 5149 Output Parameter: 5150 . mat - the matrix 5151 5152 Level: advanced 5153 5154 Notes: 5155 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5156 5157 The i and j indices are 0 based 5158 5159 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5160 5161 This sets local rows and cannot be used to set off-processor values. 5162 5163 You cannot later use MatSetValues() to change values in this matrix. 5164 5165 .keywords: matrix, aij, compressed row, sparse, parallel 5166 5167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5168 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5169 @*/ 5170 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5171 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5172 { 5173 PetscErrorCode ierr; 5174 Mat_MPIAIJ *maij; 5175 5176 PetscFunctionBegin; 5177 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5178 if (i[0]) { 5179 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5180 } 5181 if (oi[0]) { 5182 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5183 } 5184 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5185 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5186 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5187 maij = (Mat_MPIAIJ*) (*mat)->data; 5188 maij->donotstash = PETSC_TRUE; 5189 (*mat)->preallocated = PETSC_TRUE; 5190 5191 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5192 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5193 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5194 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5195 5196 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5197 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5198 5199 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5201 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 5204 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5206 PetscFunctionReturn(0); 5207 } 5208 5209 /* 5210 Special version for direct calls from Fortran 5211 */ 5212 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5213 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5214 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5215 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5216 #endif 5217 5218 /* Change these macros so can be used in void function */ 5219 #undef CHKERRQ 5220 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5221 #undef SETERRQ2 5222 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5223 #undef SETERRQ 5224 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5225 5226 EXTERN_C_BEGIN 5227 #undef __FUNCT__ 5228 #define __FUNCT__ "matsetvaluesmpiaij_" 5229 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5230 { 5231 Mat mat = *mmat; 5232 PetscInt m = *mm, n = *mn; 5233 InsertMode addv = *maddv; 5234 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5235 PetscScalar value; 5236 PetscErrorCode ierr; 5237 5238 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5239 if (mat->insertmode == NOT_SET_VALUES) { 5240 mat->insertmode = addv; 5241 } 5242 #if defined(PETSC_USE_DEBUG) 5243 else if (mat->insertmode != addv) { 5244 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5245 } 5246 #endif 5247 { 5248 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5249 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5250 PetscTruth roworiented = aij->roworiented; 5251 5252 /* Some Variables required in the macro */ 5253 Mat A = aij->A; 5254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5255 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5256 MatScalar *aa = a->a; 5257 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5258 Mat B = aij->B; 5259 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5260 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5261 MatScalar *ba = b->a; 5262 5263 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5264 PetscInt nonew = a->nonew; 5265 MatScalar *ap1,*ap2; 5266 5267 PetscFunctionBegin; 5268 for (i=0; i<m; i++) { 5269 if (im[i] < 0) continue; 5270 #if defined(PETSC_USE_DEBUG) 5271 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5272 #endif 5273 if (im[i] >= rstart && im[i] < rend) { 5274 row = im[i] - rstart; 5275 lastcol1 = -1; 5276 rp1 = aj + ai[row]; 5277 ap1 = aa + ai[row]; 5278 rmax1 = aimax[row]; 5279 nrow1 = ailen[row]; 5280 low1 = 0; 5281 high1 = nrow1; 5282 lastcol2 = -1; 5283 rp2 = bj + bi[row]; 5284 ap2 = ba + bi[row]; 5285 rmax2 = bimax[row]; 5286 nrow2 = bilen[row]; 5287 low2 = 0; 5288 high2 = nrow2; 5289 5290 for (j=0; j<n; j++) { 5291 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5292 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5293 if (in[j] >= cstart && in[j] < cend){ 5294 col = in[j] - cstart; 5295 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5296 } else if (in[j] < 0) continue; 5297 #if defined(PETSC_USE_DEBUG) 5298 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5299 #endif 5300 else { 5301 if (mat->was_assembled) { 5302 if (!aij->colmap) { 5303 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5304 } 5305 #if defined (PETSC_USE_CTABLE) 5306 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5307 col--; 5308 #else 5309 col = aij->colmap[in[j]] - 1; 5310 #endif 5311 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5312 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5313 col = in[j]; 5314 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5315 B = aij->B; 5316 b = (Mat_SeqAIJ*)B->data; 5317 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5318 rp2 = bj + bi[row]; 5319 ap2 = ba + bi[row]; 5320 rmax2 = bimax[row]; 5321 nrow2 = bilen[row]; 5322 low2 = 0; 5323 high2 = nrow2; 5324 bm = aij->B->rmap->n; 5325 ba = b->a; 5326 } 5327 } else col = in[j]; 5328 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5329 } 5330 } 5331 } else { 5332 if (!aij->donotstash) { 5333 if (roworiented) { 5334 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5335 } else { 5336 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5337 } 5338 } 5339 } 5340 }} 5341 PetscFunctionReturnVoid(); 5342 } 5343 EXTERN_C_END 5344 5345