1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SPD: 1442 A->spd_set = PETSC_TRUE; 1443 A->spd = flg; 1444 if (flg) { 1445 A->symmetric = PETSC_TRUE; 1446 A->structurally_symmetric = PETSC_TRUE; 1447 A->symmetric_set = PETSC_TRUE; 1448 A->structurally_symmetric_set = PETSC_TRUE; 1449 } 1450 break; 1451 case MAT_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_STRUCTURALLY_SYMMETRIC: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_HERMITIAN: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 case MAT_SYMMETRY_ETERNAL: 1461 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1462 break; 1463 default: 1464 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatGetRow_MPIAIJ" 1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1475 PetscErrorCode ierr; 1476 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1477 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1478 PetscInt *cmap,*idx_p; 1479 1480 PetscFunctionBegin; 1481 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1482 mat->getrowactive = PETSC_TRUE; 1483 1484 if (!mat->rowvalues && (idx || v)) { 1485 /* 1486 allocate enough space to hold information from the longest row. 1487 */ 1488 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1489 PetscInt max = 1,tmp; 1490 for (i=0; i<matin->rmap->n; i++) { 1491 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1492 if (max < tmp) { max = tmp; } 1493 } 1494 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1495 } 1496 1497 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1498 lrow = row - rstart; 1499 1500 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1501 if (!v) {pvA = 0; pvB = 0;} 1502 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1503 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1505 nztot = nzA + nzB; 1506 1507 cmap = mat->garray; 1508 if (v || idx) { 1509 if (nztot) { 1510 /* Sort by increasing column numbers, assuming A and B already sorted */ 1511 PetscInt imark = -1; 1512 if (v) { 1513 *v = v_p = mat->rowvalues; 1514 for (i=0; i<nzB; i++) { 1515 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1516 else break; 1517 } 1518 imark = i; 1519 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1520 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1521 } 1522 if (idx) { 1523 *idx = idx_p = mat->rowindices; 1524 if (imark > -1) { 1525 for (i=0; i<imark; i++) { 1526 idx_p[i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 for (i=0; i<nzB; i++) { 1530 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1531 else break; 1532 } 1533 imark = i; 1534 } 1535 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1536 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1537 } 1538 } else { 1539 if (idx) *idx = 0; 1540 if (v) *v = 0; 1541 } 1542 } 1543 *nz = nztot; 1544 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1545 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1554 1555 PetscFunctionBegin; 1556 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1557 aij->getrowactive = PETSC_FALSE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatNorm_MPIAIJ" 1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1564 { 1565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1566 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,cstart = mat->cmap->rstart; 1569 PetscReal sum = 0.0; 1570 MatScalar *v; 1571 1572 PetscFunctionBegin; 1573 if (aij->size == 1) { 1574 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1575 } else { 1576 if (type == NORM_FROBENIUS) { 1577 v = amat->a; 1578 for (i=0; i<amat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 v = bmat->a; 1586 for (i=0; i<bmat->nz; i++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1589 #else 1590 sum += (*v)*(*v); v++; 1591 #endif 1592 } 1593 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1594 *norm = sqrt(*norm); 1595 } else if (type == NORM_1) { /* max column norm */ 1596 PetscReal *tmp,*tmp2; 1597 PetscInt *jj,*garray = aij->garray; 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1600 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1601 *norm = 0.0; 1602 v = amat->a; jj = amat->j; 1603 for (j=0; j<amat->nz; j++) { 1604 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1605 } 1606 v = bmat->a; jj = bmat->j; 1607 for (j=0; j<bmat->nz; j++) { 1608 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1609 } 1610 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1611 for (j=0; j<mat->cmap->N; j++) { 1612 if (tmp2[j] > *norm) *norm = tmp2[j]; 1613 } 1614 ierr = PetscFree(tmp);CHKERRQ(ierr); 1615 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1616 } else if (type == NORM_INFINITY) { /* max row norm */ 1617 PetscReal ntemp = 0.0; 1618 for (j=0; j<aij->A->rmap->n; j++) { 1619 v = amat->a + amat->i[j]; 1620 sum = 0.0; 1621 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1622 sum += PetscAbsScalar(*v); v++; 1623 } 1624 v = bmat->a + bmat->i[j]; 1625 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1626 sum += PetscAbsScalar(*v); v++; 1627 } 1628 if (sum > ntemp) ntemp = sum; 1629 } 1630 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1631 } else { 1632 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1633 } 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatTranspose_MPIAIJ" 1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1641 { 1642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1643 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1644 PetscErrorCode ierr; 1645 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1646 PetscInt cstart=A->cmap->rstart,ncol; 1647 Mat B; 1648 MatScalar *array; 1649 1650 PetscFunctionBegin; 1651 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1652 1653 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1654 ai = Aloc->i; aj = Aloc->j; 1655 bi = Bloc->i; bj = Bloc->j; 1656 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1657 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1658 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1659 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1660 for (i=0; i<ai[ma]; i++){ 1661 d_nnz[aj[i]] ++; 1662 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1663 } 1664 1665 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1666 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1667 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1670 } else { 1671 B = *matout; 1672 } 1673 1674 /* copy over the A part */ 1675 array = Aloc->a; 1676 row = A->rmap->rstart; 1677 for (i=0; i<ma; i++) { 1678 ncol = ai[i+1]-ai[i]; 1679 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; array += ncol; aj += ncol; 1681 } 1682 aj = Aloc->j; 1683 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1684 1685 /* copy over the B part */ 1686 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1687 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1688 array = Bloc->a; 1689 row = A->rmap->rstart; 1690 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1691 cols_tmp = cols; 1692 for (i=0; i<mb; i++) { 1693 ncol = bi[i+1]-bi[i]; 1694 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; array += ncol; cols_tmp += ncol; 1696 } 1697 ierr = PetscFree(cols);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1702 *matout = B; 1703 } else { 1704 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1714 Mat a = aij->A,b = aij->B; 1715 PetscErrorCode ierr; 1716 PetscInt s1,s2,s3; 1717 1718 PetscFunctionBegin; 1719 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1720 if (rr) { 1721 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1722 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1723 /* Overlap communication with computation. */ 1724 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 if (ll) { 1727 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1728 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1729 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1730 } 1731 /* scale the diagonal block */ 1732 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1733 1734 if (rr) { 1735 /* Do a scatter end and then right scale the off-diagonal block */ 1736 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1738 } 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1746 { 1747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1752 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1760 { 1761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatEqual_MPIAIJ" 1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1772 { 1773 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1774 Mat a,b,c,d; 1775 PetscTruth flg; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 a = matA->A; b = matA->B; 1780 c = matB->A; d = matB->B; 1781 1782 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1783 if (flg) { 1784 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1785 } 1786 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatCopy_MPIAIJ" 1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1793 { 1794 PetscErrorCode ierr; 1795 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1796 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 /* because of the column compression in the off-processor part of the matrix a->B, 1802 the number of columns in a->B and b->B may be different, hence we cannot call 1803 the MatCopy() directly on the two parts. If need be, we can provide a more 1804 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1805 then copying the submatrices */ 1806 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1809 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1817 { 1818 PetscErrorCode ierr; 1819 1820 PetscFunctionBegin; 1821 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/0, 2719 0, 2720 0, 2721 0, 2722 MatGetMultiProcBlock_MPIAIJ 2723 }; 2724 2725 /* ----------------------------------------------------------------------------------------*/ 2726 2727 EXTERN_C_BEGIN 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2730 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2731 { 2732 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2733 PetscErrorCode ierr; 2734 2735 PetscFunctionBegin; 2736 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2737 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2738 PetscFunctionReturn(0); 2739 } 2740 EXTERN_C_END 2741 2742 EXTERN_C_BEGIN 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2745 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2746 { 2747 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2752 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 EXTERN_C_END 2756 2757 EXTERN_C_BEGIN 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2761 { 2762 Mat_MPIAIJ *b; 2763 PetscErrorCode ierr; 2764 PetscInt i; 2765 2766 PetscFunctionBegin; 2767 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2768 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2769 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2770 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2771 2772 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2773 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2774 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2775 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2776 if (d_nnz) { 2777 for (i=0; i<B->rmap->n; i++) { 2778 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2779 } 2780 } 2781 if (o_nnz) { 2782 for (i=0; i<B->rmap->n; i++) { 2783 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2784 } 2785 } 2786 b = (Mat_MPIAIJ*)B->data; 2787 2788 if (!B->preallocated) { 2789 /* Explicitly create 2 MATSEQAIJ matrices. */ 2790 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2791 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2792 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2794 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2795 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2796 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2798 } 2799 2800 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2801 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2802 B->preallocated = PETSC_TRUE; 2803 PetscFunctionReturn(0); 2804 } 2805 EXTERN_C_END 2806 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2809 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2810 { 2811 Mat mat; 2812 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2813 PetscErrorCode ierr; 2814 2815 PetscFunctionBegin; 2816 *newmat = 0; 2817 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2818 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2819 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2820 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2821 a = (Mat_MPIAIJ*)mat->data; 2822 2823 mat->factortype = matin->factortype; 2824 mat->rmap->bs = matin->rmap->bs; 2825 mat->assembled = PETSC_TRUE; 2826 mat->insertmode = NOT_SET_VALUES; 2827 mat->preallocated = PETSC_TRUE; 2828 2829 a->size = oldmat->size; 2830 a->rank = oldmat->rank; 2831 a->donotstash = oldmat->donotstash; 2832 a->roworiented = oldmat->roworiented; 2833 a->rowindices = 0; 2834 a->rowvalues = 0; 2835 a->getrowactive = PETSC_FALSE; 2836 2837 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2838 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2839 2840 if (oldmat->colmap) { 2841 #if defined (PETSC_USE_CTABLE) 2842 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2843 #else 2844 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2845 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2846 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2847 #endif 2848 } else a->colmap = 0; 2849 if (oldmat->garray) { 2850 PetscInt len; 2851 len = oldmat->B->cmap->n; 2852 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2853 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2854 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2855 } else a->garray = 0; 2856 2857 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2859 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2861 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2862 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2863 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2864 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2865 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2866 *newmat = mat; 2867 PetscFunctionReturn(0); 2868 } 2869 2870 /* 2871 Allows sending/receiving larger messages then 2 gigabytes in a single call 2872 */ 2873 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2874 { 2875 int ierr; 2876 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2877 PetscInt i,numchunks; 2878 PetscMPIInt icnt; 2879 2880 numchunks = cnt/CHUNKSIZE + 1; 2881 for (i=0; i<numchunks; i++) { 2882 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2883 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2884 if (type == MPIU_INT) { 2885 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2886 } else if (type == MPIU_SCALAR) { 2887 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2888 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2889 } 2890 return 0; 2891 } 2892 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2893 { 2894 int ierr; 2895 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2896 MPI_Status status; 2897 PetscInt i,numchunks; 2898 PetscMPIInt icnt; 2899 2900 numchunks = cnt/CHUNKSIZE + 1; 2901 for (i=0; i<numchunks; i++) { 2902 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2903 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2904 if (type == MPIU_INT) { 2905 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2906 } else if (type == MPIU_SCALAR) { 2907 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2908 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2909 } 2910 return 0; 2911 } 2912 2913 #undef __FUNCT__ 2914 #define __FUNCT__ "MatLoad_MPIAIJ" 2915 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2916 { 2917 PetscScalar *vals,*svals; 2918 MPI_Comm comm = ((PetscObject)viewer)->comm; 2919 PetscErrorCode ierr; 2920 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2921 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2922 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2923 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2924 PetscInt cend,cstart,n,*rowners,sizesset=1; 2925 int fd; 2926 2927 PetscFunctionBegin; 2928 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2929 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2930 if (!rank) { 2931 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2932 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2933 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2934 } 2935 2936 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2937 2938 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2939 M = header[1]; N = header[2]; 2940 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2941 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2942 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2943 2944 /* If global sizes are set, check if they are consistent with that given in the file */ 2945 if (sizesset) { 2946 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2947 } 2948 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2949 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2950 2951 /* determine ownership of all rows */ 2952 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2953 else m = newMat->rmap->n; /* Set by user */ 2954 2955 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2956 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2957 2958 /* First process needs enough room for process with most rows */ 2959 if (!rank) { 2960 mmax = rowners[1]; 2961 for (i=2; i<size; i++) { 2962 mmax = PetscMax(mmax,rowners[i]); 2963 } 2964 } else mmax = m; 2965 2966 rowners[0] = 0; 2967 for (i=2; i<=size; i++) { 2968 rowners[i] += rowners[i-1]; 2969 } 2970 rstart = rowners[rank]; 2971 rend = rowners[rank+1]; 2972 2973 /* distribute row lengths to all processors */ 2974 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2975 if (!rank) { 2976 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2977 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2978 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2979 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2980 for (j=0; j<m; j++) { 2981 procsnz[0] += ourlens[j]; 2982 } 2983 for (i=1; i<size; i++) { 2984 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2985 /* calculate the number of nonzeros on each processor */ 2986 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2987 procsnz[i] += rowlengths[j]; 2988 } 2989 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2990 } 2991 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2992 } else { 2993 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2994 } 2995 2996 if (!rank) { 2997 /* determine max buffer needed and allocate it */ 2998 maxnz = 0; 2999 for (i=0; i<size; i++) { 3000 maxnz = PetscMax(maxnz,procsnz[i]); 3001 } 3002 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3003 3004 /* read in my part of the matrix column indices */ 3005 nz = procsnz[0]; 3006 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3007 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3008 3009 /* read in every one elses and ship off */ 3010 for (i=1; i<size; i++) { 3011 nz = procsnz[i]; 3012 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3013 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3014 } 3015 ierr = PetscFree(cols);CHKERRQ(ierr); 3016 } else { 3017 /* determine buffer space needed for message */ 3018 nz = 0; 3019 for (i=0; i<m; i++) { 3020 nz += ourlens[i]; 3021 } 3022 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3023 3024 /* receive message of column indices*/ 3025 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3026 } 3027 3028 /* determine column ownership if matrix is not square */ 3029 if (N != M) { 3030 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3031 else n = newMat->cmap->n; 3032 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3033 cstart = cend - n; 3034 } else { 3035 cstart = rstart; 3036 cend = rend; 3037 n = cend - cstart; 3038 } 3039 3040 /* loop over local rows, determining number of off diagonal entries */ 3041 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3042 jj = 0; 3043 for (i=0; i<m; i++) { 3044 for (j=0; j<ourlens[i]; j++) { 3045 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3046 jj++; 3047 } 3048 } 3049 3050 for (i=0; i<m; i++) { 3051 ourlens[i] -= offlens[i]; 3052 } 3053 if (!sizesset) { 3054 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3055 } 3056 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3057 3058 for (i=0; i<m; i++) { 3059 ourlens[i] += offlens[i]; 3060 } 3061 3062 if (!rank) { 3063 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3064 3065 /* read in my part of the matrix numerical values */ 3066 nz = procsnz[0]; 3067 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3068 3069 /* insert into matrix */ 3070 jj = rstart; 3071 smycols = mycols; 3072 svals = vals; 3073 for (i=0; i<m; i++) { 3074 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3075 smycols += ourlens[i]; 3076 svals += ourlens[i]; 3077 jj++; 3078 } 3079 3080 /* read in other processors and ship out */ 3081 for (i=1; i<size; i++) { 3082 nz = procsnz[i]; 3083 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3084 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3085 } 3086 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3087 } else { 3088 /* receive numeric values */ 3089 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3090 3091 /* receive message of values*/ 3092 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3093 3094 /* insert into matrix */ 3095 jj = rstart; 3096 smycols = mycols; 3097 svals = vals; 3098 for (i=0; i<m; i++) { 3099 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3100 smycols += ourlens[i]; 3101 svals += ourlens[i]; 3102 jj++; 3103 } 3104 } 3105 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3106 ierr = PetscFree(vals);CHKERRQ(ierr); 3107 ierr = PetscFree(mycols);CHKERRQ(ierr); 3108 ierr = PetscFree(rowners);CHKERRQ(ierr); 3109 3110 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3111 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 PetscFunctionReturn(0); 3113 } 3114 3115 #undef __FUNCT__ 3116 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3117 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3118 { 3119 PetscErrorCode ierr; 3120 IS iscol_local; 3121 PetscInt csize; 3122 3123 PetscFunctionBegin; 3124 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3125 if (call == MAT_REUSE_MATRIX) { 3126 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3127 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3128 } else { 3129 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3130 } 3131 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3132 if (call == MAT_INITIAL_MATRIX) { 3133 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3134 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3135 } 3136 PetscFunctionReturn(0); 3137 } 3138 3139 #undef __FUNCT__ 3140 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3141 /* 3142 Not great since it makes two copies of the submatrix, first an SeqAIJ 3143 in local and then by concatenating the local matrices the end result. 3144 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3145 3146 Note: This requires a sequential iscol with all indices. 3147 */ 3148 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3149 { 3150 PetscErrorCode ierr; 3151 PetscMPIInt rank,size; 3152 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3153 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3154 Mat *local,M,Mreuse; 3155 MatScalar *vwork,*aa; 3156 MPI_Comm comm = ((PetscObject)mat)->comm; 3157 Mat_SeqAIJ *aij; 3158 3159 3160 PetscFunctionBegin; 3161 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3162 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3163 3164 if (call == MAT_REUSE_MATRIX) { 3165 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3166 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3167 local = &Mreuse; 3168 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3169 } else { 3170 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3171 Mreuse = *local; 3172 ierr = PetscFree(local);CHKERRQ(ierr); 3173 } 3174 3175 /* 3176 m - number of local rows 3177 n - number of columns (same on all processors) 3178 rstart - first row in new global matrix generated 3179 */ 3180 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3181 if (call == MAT_INITIAL_MATRIX) { 3182 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3183 ii = aij->i; 3184 jj = aij->j; 3185 3186 /* 3187 Determine the number of non-zeros in the diagonal and off-diagonal 3188 portions of the matrix in order to do correct preallocation 3189 */ 3190 3191 /* first get start and end of "diagonal" columns */ 3192 if (csize == PETSC_DECIDE) { 3193 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3194 if (mglobal == n) { /* square matrix */ 3195 nlocal = m; 3196 } else { 3197 nlocal = n/size + ((n % size) > rank); 3198 } 3199 } else { 3200 nlocal = csize; 3201 } 3202 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3203 rstart = rend - nlocal; 3204 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3205 3206 /* next, compute all the lengths */ 3207 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3208 olens = dlens + m; 3209 for (i=0; i<m; i++) { 3210 jend = ii[i+1] - ii[i]; 3211 olen = 0; 3212 dlen = 0; 3213 for (j=0; j<jend; j++) { 3214 if (*jj < rstart || *jj >= rend) olen++; 3215 else dlen++; 3216 jj++; 3217 } 3218 olens[i] = olen; 3219 dlens[i] = dlen; 3220 } 3221 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3222 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3223 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3224 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3225 ierr = PetscFree(dlens);CHKERRQ(ierr); 3226 } else { 3227 PetscInt ml,nl; 3228 3229 M = *newmat; 3230 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3231 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3232 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3233 /* 3234 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3235 rather than the slower MatSetValues(). 3236 */ 3237 M->was_assembled = PETSC_TRUE; 3238 M->assembled = PETSC_FALSE; 3239 } 3240 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3241 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3242 ii = aij->i; 3243 jj = aij->j; 3244 aa = aij->a; 3245 for (i=0; i<m; i++) { 3246 row = rstart + i; 3247 nz = ii[i+1] - ii[i]; 3248 cwork = jj; jj += nz; 3249 vwork = aa; aa += nz; 3250 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3251 } 3252 3253 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3255 *newmat = M; 3256 3257 /* save submatrix used in processor for next request */ 3258 if (call == MAT_INITIAL_MATRIX) { 3259 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3260 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3261 } 3262 3263 PetscFunctionReturn(0); 3264 } 3265 3266 EXTERN_C_BEGIN 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3269 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3270 { 3271 PetscInt m,cstart, cend,j,nnz,i,d; 3272 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3273 const PetscInt *JJ; 3274 PetscScalar *values; 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3279 3280 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3281 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3282 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3283 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3284 m = B->rmap->n; 3285 cstart = B->cmap->rstart; 3286 cend = B->cmap->rend; 3287 rstart = B->rmap->rstart; 3288 3289 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3290 3291 #if defined(PETSC_USE_DEBUGGING) 3292 for (i=0; i<m; i++) { 3293 nnz = Ii[i+1]- Ii[i]; 3294 JJ = J + Ii[i]; 3295 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3296 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3297 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3298 } 3299 #endif 3300 3301 for (i=0; i<m; i++) { 3302 nnz = Ii[i+1]- Ii[i]; 3303 JJ = J + Ii[i]; 3304 nnz_max = PetscMax(nnz_max,nnz); 3305 d = 0; 3306 for (j=0; j<nnz; j++) { 3307 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3308 } 3309 d_nnz[i] = d; 3310 o_nnz[i] = nnz - d; 3311 } 3312 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3313 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3314 3315 if (v) values = (PetscScalar*)v; 3316 else { 3317 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3318 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3319 } 3320 3321 for (i=0; i<m; i++) { 3322 ii = i + rstart; 3323 nnz = Ii[i+1]- Ii[i]; 3324 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3325 } 3326 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 3329 if (!v) { 3330 ierr = PetscFree(values);CHKERRQ(ierr); 3331 } 3332 PetscFunctionReturn(0); 3333 } 3334 EXTERN_C_END 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3338 /*@ 3339 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3340 (the default parallel PETSc format). 3341 3342 Collective on MPI_Comm 3343 3344 Input Parameters: 3345 + B - the matrix 3346 . i - the indices into j for the start of each local row (starts with zero) 3347 . j - the column indices for each local row (starts with zero) 3348 - v - optional values in the matrix 3349 3350 Level: developer 3351 3352 Notes: 3353 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3354 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3355 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3356 3357 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3358 3359 The format which is used for the sparse matrix input, is equivalent to a 3360 row-major ordering.. i.e for the following matrix, the input data expected is 3361 as shown: 3362 3363 1 0 0 3364 2 0 3 P0 3365 ------- 3366 4 5 6 P1 3367 3368 Process0 [P0]: rows_owned=[0,1] 3369 i = {0,1,3} [size = nrow+1 = 2+1] 3370 j = {0,0,2} [size = nz = 6] 3371 v = {1,2,3} [size = nz = 6] 3372 3373 Process1 [P1]: rows_owned=[2] 3374 i = {0,3} [size = nrow+1 = 1+1] 3375 j = {0,1,2} [size = nz = 6] 3376 v = {4,5,6} [size = nz = 6] 3377 3378 .keywords: matrix, aij, compressed row, sparse, parallel 3379 3380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3381 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3382 @*/ 3383 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3384 { 3385 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3386 3387 PetscFunctionBegin; 3388 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3389 if (f) { 3390 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3391 } 3392 PetscFunctionReturn(0); 3393 } 3394 3395 #undef __FUNCT__ 3396 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3397 /*@C 3398 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3399 (the default parallel PETSc format). For good matrix assembly performance 3400 the user should preallocate the matrix storage by setting the parameters 3401 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3402 performance can be increased by more than a factor of 50. 3403 3404 Collective on MPI_Comm 3405 3406 Input Parameters: 3407 + A - the matrix 3408 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3409 (same value is used for all local rows) 3410 . d_nnz - array containing the number of nonzeros in the various rows of the 3411 DIAGONAL portion of the local submatrix (possibly different for each row) 3412 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3413 The size of this array is equal to the number of local rows, i.e 'm'. 3414 You must leave room for the diagonal entry even if it is zero. 3415 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3416 submatrix (same value is used for all local rows). 3417 - o_nnz - array containing the number of nonzeros in the various rows of the 3418 OFF-DIAGONAL portion of the local submatrix (possibly different for 3419 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3420 structure. The size of this array is equal to the number 3421 of local rows, i.e 'm'. 3422 3423 If the *_nnz parameter is given then the *_nz parameter is ignored 3424 3425 The AIJ format (also called the Yale sparse matrix format or 3426 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3427 storage. The stored row and column indices begin with zero. 3428 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3429 3430 The parallel matrix is partitioned such that the first m0 rows belong to 3431 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3432 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3433 3434 The DIAGONAL portion of the local submatrix of a processor can be defined 3435 as the submatrix which is obtained by extraction the part corresponding to 3436 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3437 first row that belongs to the processor, r2 is the last row belonging to 3438 the this processor, and c1-c2 is range of indices of the local part of a 3439 vector suitable for applying the matrix to. This is an mxn matrix. In the 3440 common case of a square matrix, the row and column ranges are the same and 3441 the DIAGONAL part is also square. The remaining portion of the local 3442 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3443 3444 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3445 3446 You can call MatGetInfo() to get information on how effective the preallocation was; 3447 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3448 You can also run with the option -info and look for messages with the string 3449 malloc in them to see if additional memory allocation was needed. 3450 3451 Example usage: 3452 3453 Consider the following 8x8 matrix with 34 non-zero values, that is 3454 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3455 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3456 as follows: 3457 3458 .vb 3459 1 2 0 | 0 3 0 | 0 4 3460 Proc0 0 5 6 | 7 0 0 | 8 0 3461 9 0 10 | 11 0 0 | 12 0 3462 ------------------------------------- 3463 13 0 14 | 15 16 17 | 0 0 3464 Proc1 0 18 0 | 19 20 21 | 0 0 3465 0 0 0 | 22 23 0 | 24 0 3466 ------------------------------------- 3467 Proc2 25 26 27 | 0 0 28 | 29 0 3468 30 0 0 | 31 32 33 | 0 34 3469 .ve 3470 3471 This can be represented as a collection of submatrices as: 3472 3473 .vb 3474 A B C 3475 D E F 3476 G H I 3477 .ve 3478 3479 Where the submatrices A,B,C are owned by proc0, D,E,F are 3480 owned by proc1, G,H,I are owned by proc2. 3481 3482 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3483 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3484 The 'M','N' parameters are 8,8, and have the same values on all procs. 3485 3486 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3487 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3488 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3489 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3490 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3491 matrix, ans [DF] as another SeqAIJ matrix. 3492 3493 When d_nz, o_nz parameters are specified, d_nz storage elements are 3494 allocated for every row of the local diagonal submatrix, and o_nz 3495 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3496 One way to choose d_nz and o_nz is to use the max nonzerors per local 3497 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3498 In this case, the values of d_nz,o_nz are: 3499 .vb 3500 proc0 : dnz = 2, o_nz = 2 3501 proc1 : dnz = 3, o_nz = 2 3502 proc2 : dnz = 1, o_nz = 4 3503 .ve 3504 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3505 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3506 for proc3. i.e we are using 12+15+10=37 storage locations to store 3507 34 values. 3508 3509 When d_nnz, o_nnz parameters are specified, the storage is specified 3510 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3511 In the above case the values for d_nnz,o_nnz are: 3512 .vb 3513 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3514 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3515 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3516 .ve 3517 Here the space allocated is sum of all the above values i.e 34, and 3518 hence pre-allocation is perfect. 3519 3520 Level: intermediate 3521 3522 .keywords: matrix, aij, compressed row, sparse, parallel 3523 3524 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3525 MPIAIJ, MatGetInfo() 3526 @*/ 3527 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3528 { 3529 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3530 3531 PetscFunctionBegin; 3532 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3533 if (f) { 3534 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3535 } 3536 PetscFunctionReturn(0); 3537 } 3538 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3541 /*@ 3542 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3543 CSR format the local rows. 3544 3545 Collective on MPI_Comm 3546 3547 Input Parameters: 3548 + comm - MPI communicator 3549 . m - number of local rows (Cannot be PETSC_DECIDE) 3550 . n - This value should be the same as the local size used in creating the 3551 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3552 calculated if N is given) For square matrices n is almost always m. 3553 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3554 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3555 . i - row indices 3556 . j - column indices 3557 - a - matrix values 3558 3559 Output Parameter: 3560 . mat - the matrix 3561 3562 Level: intermediate 3563 3564 Notes: 3565 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3566 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3567 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3568 3569 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3570 3571 The format which is used for the sparse matrix input, is equivalent to a 3572 row-major ordering.. i.e for the following matrix, the input data expected is 3573 as shown: 3574 3575 1 0 0 3576 2 0 3 P0 3577 ------- 3578 4 5 6 P1 3579 3580 Process0 [P0]: rows_owned=[0,1] 3581 i = {0,1,3} [size = nrow+1 = 2+1] 3582 j = {0,0,2} [size = nz = 6] 3583 v = {1,2,3} [size = nz = 6] 3584 3585 Process1 [P1]: rows_owned=[2] 3586 i = {0,3} [size = nrow+1 = 1+1] 3587 j = {0,1,2} [size = nz = 6] 3588 v = {4,5,6} [size = nz = 6] 3589 3590 .keywords: matrix, aij, compressed row, sparse, parallel 3591 3592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3593 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3594 @*/ 3595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3596 { 3597 PetscErrorCode ierr; 3598 3599 PetscFunctionBegin; 3600 if (i[0]) { 3601 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3602 } 3603 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3604 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3605 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3606 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3607 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3608 PetscFunctionReturn(0); 3609 } 3610 3611 #undef __FUNCT__ 3612 #define __FUNCT__ "MatCreateMPIAIJ" 3613 /*@C 3614 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3615 (the default parallel PETSc format). For good matrix assembly performance 3616 the user should preallocate the matrix storage by setting the parameters 3617 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3618 performance can be increased by more than a factor of 50. 3619 3620 Collective on MPI_Comm 3621 3622 Input Parameters: 3623 + comm - MPI communicator 3624 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3625 This value should be the same as the local size used in creating the 3626 y vector for the matrix-vector product y = Ax. 3627 . n - This value should be the same as the local size used in creating the 3628 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3629 calculated if N is given) For square matrices n is almost always m. 3630 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3631 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3632 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3633 (same value is used for all local rows) 3634 . d_nnz - array containing the number of nonzeros in the various rows of the 3635 DIAGONAL portion of the local submatrix (possibly different for each row) 3636 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3637 The size of this array is equal to the number of local rows, i.e 'm'. 3638 You must leave room for the diagonal entry even if it is zero. 3639 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3640 submatrix (same value is used for all local rows). 3641 - o_nnz - array containing the number of nonzeros in the various rows of the 3642 OFF-DIAGONAL portion of the local submatrix (possibly different for 3643 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3644 structure. The size of this array is equal to the number 3645 of local rows, i.e 'm'. 3646 3647 Output Parameter: 3648 . A - the matrix 3649 3650 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3651 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3652 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3653 3654 Notes: 3655 If the *_nnz parameter is given then the *_nz parameter is ignored 3656 3657 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3658 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3659 storage requirements for this matrix. 3660 3661 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3662 processor than it must be used on all processors that share the object for 3663 that argument. 3664 3665 The user MUST specify either the local or global matrix dimensions 3666 (possibly both). 3667 3668 The parallel matrix is partitioned across processors such that the 3669 first m0 rows belong to process 0, the next m1 rows belong to 3670 process 1, the next m2 rows belong to process 2 etc.. where 3671 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3672 values corresponding to [m x N] submatrix. 3673 3674 The columns are logically partitioned with the n0 columns belonging 3675 to 0th partition, the next n1 columns belonging to the next 3676 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3677 3678 The DIAGONAL portion of the local submatrix on any given processor 3679 is the submatrix corresponding to the rows and columns m,n 3680 corresponding to the given processor. i.e diagonal matrix on 3681 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3682 etc. The remaining portion of the local submatrix [m x (N-n)] 3683 constitute the OFF-DIAGONAL portion. The example below better 3684 illustrates this concept. 3685 3686 For a square global matrix we define each processor's diagonal portion 3687 to be its local rows and the corresponding columns (a square submatrix); 3688 each processor's off-diagonal portion encompasses the remainder of the 3689 local matrix (a rectangular submatrix). 3690 3691 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3692 3693 When calling this routine with a single process communicator, a matrix of 3694 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3695 type of communicator, use the construction mechanism: 3696 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3697 3698 By default, this format uses inodes (identical nodes) when possible. 3699 We search for consecutive rows with the same nonzero structure, thereby 3700 reusing matrix information to achieve increased efficiency. 3701 3702 Options Database Keys: 3703 + -mat_no_inode - Do not use inodes 3704 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3705 - -mat_aij_oneindex - Internally use indexing starting at 1 3706 rather than 0. Note that when calling MatSetValues(), 3707 the user still MUST index entries starting at 0! 3708 3709 3710 Example usage: 3711 3712 Consider the following 8x8 matrix with 34 non-zero values, that is 3713 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3714 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3715 as follows: 3716 3717 .vb 3718 1 2 0 | 0 3 0 | 0 4 3719 Proc0 0 5 6 | 7 0 0 | 8 0 3720 9 0 10 | 11 0 0 | 12 0 3721 ------------------------------------- 3722 13 0 14 | 15 16 17 | 0 0 3723 Proc1 0 18 0 | 19 20 21 | 0 0 3724 0 0 0 | 22 23 0 | 24 0 3725 ------------------------------------- 3726 Proc2 25 26 27 | 0 0 28 | 29 0 3727 30 0 0 | 31 32 33 | 0 34 3728 .ve 3729 3730 This can be represented as a collection of submatrices as: 3731 3732 .vb 3733 A B C 3734 D E F 3735 G H I 3736 .ve 3737 3738 Where the submatrices A,B,C are owned by proc0, D,E,F are 3739 owned by proc1, G,H,I are owned by proc2. 3740 3741 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3742 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3743 The 'M','N' parameters are 8,8, and have the same values on all procs. 3744 3745 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3746 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3747 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3748 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3749 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3750 matrix, ans [DF] as another SeqAIJ matrix. 3751 3752 When d_nz, o_nz parameters are specified, d_nz storage elements are 3753 allocated for every row of the local diagonal submatrix, and o_nz 3754 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3755 One way to choose d_nz and o_nz is to use the max nonzerors per local 3756 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3757 In this case, the values of d_nz,o_nz are: 3758 .vb 3759 proc0 : dnz = 2, o_nz = 2 3760 proc1 : dnz = 3, o_nz = 2 3761 proc2 : dnz = 1, o_nz = 4 3762 .ve 3763 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3764 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3765 for proc3. i.e we are using 12+15+10=37 storage locations to store 3766 34 values. 3767 3768 When d_nnz, o_nnz parameters are specified, the storage is specified 3769 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3770 In the above case the values for d_nnz,o_nnz are: 3771 .vb 3772 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3773 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3774 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3775 .ve 3776 Here the space allocated is sum of all the above values i.e 34, and 3777 hence pre-allocation is perfect. 3778 3779 Level: intermediate 3780 3781 .keywords: matrix, aij, compressed row, sparse, parallel 3782 3783 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3784 MPIAIJ, MatCreateMPIAIJWithArrays() 3785 @*/ 3786 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3787 { 3788 PetscErrorCode ierr; 3789 PetscMPIInt size; 3790 3791 PetscFunctionBegin; 3792 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3793 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3794 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3795 if (size > 1) { 3796 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3797 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3798 } else { 3799 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3800 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3801 } 3802 PetscFunctionReturn(0); 3803 } 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3807 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3808 { 3809 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3810 3811 PetscFunctionBegin; 3812 *Ad = a->A; 3813 *Ao = a->B; 3814 *colmap = a->garray; 3815 PetscFunctionReturn(0); 3816 } 3817 3818 #undef __FUNCT__ 3819 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3820 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3821 { 3822 PetscErrorCode ierr; 3823 PetscInt i; 3824 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3825 3826 PetscFunctionBegin; 3827 if (coloring->ctype == IS_COLORING_GLOBAL) { 3828 ISColoringValue *allcolors,*colors; 3829 ISColoring ocoloring; 3830 3831 /* set coloring for diagonal portion */ 3832 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3833 3834 /* set coloring for off-diagonal portion */ 3835 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3836 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3837 for (i=0; i<a->B->cmap->n; i++) { 3838 colors[i] = allcolors[a->garray[i]]; 3839 } 3840 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3841 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3842 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3843 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3844 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3845 ISColoringValue *colors; 3846 PetscInt *larray; 3847 ISColoring ocoloring; 3848 3849 /* set coloring for diagonal portion */ 3850 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3851 for (i=0; i<a->A->cmap->n; i++) { 3852 larray[i] = i + A->cmap->rstart; 3853 } 3854 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3855 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3856 for (i=0; i<a->A->cmap->n; i++) { 3857 colors[i] = coloring->colors[larray[i]]; 3858 } 3859 ierr = PetscFree(larray);CHKERRQ(ierr); 3860 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3861 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3862 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3863 3864 /* set coloring for off-diagonal portion */ 3865 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3866 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3867 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3868 for (i=0; i<a->B->cmap->n; i++) { 3869 colors[i] = coloring->colors[larray[i]]; 3870 } 3871 ierr = PetscFree(larray);CHKERRQ(ierr); 3872 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3873 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3874 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3875 } else { 3876 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3877 } 3878 3879 PetscFunctionReturn(0); 3880 } 3881 3882 #if defined(PETSC_HAVE_ADIC) 3883 #undef __FUNCT__ 3884 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3885 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3886 { 3887 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3888 PetscErrorCode ierr; 3889 3890 PetscFunctionBegin; 3891 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3892 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895 #endif 3896 3897 #undef __FUNCT__ 3898 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3899 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3900 { 3901 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3902 PetscErrorCode ierr; 3903 3904 PetscFunctionBegin; 3905 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3906 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3907 PetscFunctionReturn(0); 3908 } 3909 3910 #undef __FUNCT__ 3911 #define __FUNCT__ "MatMerge" 3912 /*@ 3913 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3914 matrices from each processor 3915 3916 Collective on MPI_Comm 3917 3918 Input Parameters: 3919 + comm - the communicators the parallel matrix will live on 3920 . inmat - the input sequential matrices 3921 . n - number of local columns (or PETSC_DECIDE) 3922 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3923 3924 Output Parameter: 3925 . outmat - the parallel matrix generated 3926 3927 Level: advanced 3928 3929 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3930 3931 @*/ 3932 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3933 { 3934 PetscErrorCode ierr; 3935 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3936 PetscInt *indx; 3937 PetscScalar *values; 3938 3939 PetscFunctionBegin; 3940 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3941 if (scall == MAT_INITIAL_MATRIX){ 3942 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3943 if (n == PETSC_DECIDE){ 3944 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3945 } 3946 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3947 rstart -= m; 3948 3949 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3950 for (i=0;i<m;i++) { 3951 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3952 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3953 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3954 } 3955 /* This routine will ONLY return MPIAIJ type matrix */ 3956 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3957 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3958 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3959 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3960 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3961 3962 } else if (scall == MAT_REUSE_MATRIX){ 3963 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3964 } else { 3965 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3966 } 3967 3968 for (i=0;i<m;i++) { 3969 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3970 Ii = i + rstart; 3971 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3972 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3973 } 3974 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3975 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3977 3978 PetscFunctionReturn(0); 3979 } 3980 3981 #undef __FUNCT__ 3982 #define __FUNCT__ "MatFileSplit" 3983 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3984 { 3985 PetscErrorCode ierr; 3986 PetscMPIInt rank; 3987 PetscInt m,N,i,rstart,nnz; 3988 size_t len; 3989 const PetscInt *indx; 3990 PetscViewer out; 3991 char *name; 3992 Mat B; 3993 const PetscScalar *values; 3994 3995 PetscFunctionBegin; 3996 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3997 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3998 /* Should this be the type of the diagonal block of A? */ 3999 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4000 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4001 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4002 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4003 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4004 for (i=0;i<m;i++) { 4005 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4006 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4007 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4008 } 4009 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4010 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4011 4012 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4013 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4014 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4015 sprintf(name,"%s.%d",outfile,rank); 4016 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4017 ierr = PetscFree(name); 4018 ierr = MatView(B,out);CHKERRQ(ierr); 4019 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4020 ierr = MatDestroy(B);CHKERRQ(ierr); 4021 PetscFunctionReturn(0); 4022 } 4023 4024 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4025 #undef __FUNCT__ 4026 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4027 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4028 { 4029 PetscErrorCode ierr; 4030 Mat_Merge_SeqsToMPI *merge; 4031 PetscContainer container; 4032 4033 PetscFunctionBegin; 4034 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4035 if (container) { 4036 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4037 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4038 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4039 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4040 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4041 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4042 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4043 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4044 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4045 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4046 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4047 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4048 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4049 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4050 4051 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4052 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4053 } 4054 ierr = PetscFree(merge);CHKERRQ(ierr); 4055 4056 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4057 PetscFunctionReturn(0); 4058 } 4059 4060 #include "../src/mat/utils/freespace.h" 4061 #include "petscbt.h" 4062 4063 #undef __FUNCT__ 4064 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4065 /*@C 4066 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4067 matrices from each processor 4068 4069 Collective on MPI_Comm 4070 4071 Input Parameters: 4072 + comm - the communicators the parallel matrix will live on 4073 . seqmat - the input sequential matrices 4074 . m - number of local rows (or PETSC_DECIDE) 4075 . n - number of local columns (or PETSC_DECIDE) 4076 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4077 4078 Output Parameter: 4079 . mpimat - the parallel matrix generated 4080 4081 Level: advanced 4082 4083 Notes: 4084 The dimensions of the sequential matrix in each processor MUST be the same. 4085 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4086 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4087 @*/ 4088 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4089 { 4090 PetscErrorCode ierr; 4091 MPI_Comm comm=((PetscObject)mpimat)->comm; 4092 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4093 PetscMPIInt size,rank,taga,*len_s; 4094 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4095 PetscInt proc,m; 4096 PetscInt **buf_ri,**buf_rj; 4097 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4098 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4099 MPI_Request *s_waits,*r_waits; 4100 MPI_Status *status; 4101 MatScalar *aa=a->a; 4102 MatScalar **abuf_r,*ba_i; 4103 Mat_Merge_SeqsToMPI *merge; 4104 PetscContainer container; 4105 4106 PetscFunctionBegin; 4107 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4108 4109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4110 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4111 4112 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4113 if (container) { 4114 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4115 } 4116 bi = merge->bi; 4117 bj = merge->bj; 4118 buf_ri = merge->buf_ri; 4119 buf_rj = merge->buf_rj; 4120 4121 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4122 owners = merge->rowmap->range; 4123 len_s = merge->len_s; 4124 4125 /* send and recv matrix values */ 4126 /*-----------------------------*/ 4127 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4128 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4129 4130 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4131 for (proc=0,k=0; proc<size; proc++){ 4132 if (!len_s[proc]) continue; 4133 i = owners[proc]; 4134 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4135 k++; 4136 } 4137 4138 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4139 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4140 ierr = PetscFree(status);CHKERRQ(ierr); 4141 4142 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4143 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4144 4145 /* insert mat values of mpimat */ 4146 /*----------------------------*/ 4147 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4148 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4149 4150 for (k=0; k<merge->nrecv; k++){ 4151 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4152 nrows = *(buf_ri_k[k]); 4153 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4154 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4155 } 4156 4157 /* set values of ba */ 4158 m = merge->rowmap->n; 4159 for (i=0; i<m; i++) { 4160 arow = owners[rank] + i; 4161 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4162 bnzi = bi[i+1] - bi[i]; 4163 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4164 4165 /* add local non-zero vals of this proc's seqmat into ba */ 4166 anzi = ai[arow+1] - ai[arow]; 4167 aj = a->j + ai[arow]; 4168 aa = a->a + ai[arow]; 4169 nextaj = 0; 4170 for (j=0; nextaj<anzi; j++){ 4171 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4172 ba_i[j] += aa[nextaj++]; 4173 } 4174 } 4175 4176 /* add received vals into ba */ 4177 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4178 /* i-th row */ 4179 if (i == *nextrow[k]) { 4180 anzi = *(nextai[k]+1) - *nextai[k]; 4181 aj = buf_rj[k] + *(nextai[k]); 4182 aa = abuf_r[k] + *(nextai[k]); 4183 nextaj = 0; 4184 for (j=0; nextaj<anzi; j++){ 4185 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4186 ba_i[j] += aa[nextaj++]; 4187 } 4188 } 4189 nextrow[k]++; nextai[k]++; 4190 } 4191 } 4192 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4193 } 4194 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4195 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4196 4197 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4198 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4199 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4200 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4201 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4202 PetscFunctionReturn(0); 4203 } 4204 4205 #undef __FUNCT__ 4206 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4207 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4208 { 4209 PetscErrorCode ierr; 4210 Mat B_mpi; 4211 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4212 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4213 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4214 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4215 PetscInt len,proc,*dnz,*onz; 4216 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4217 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4218 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4219 MPI_Status *status; 4220 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4221 PetscBT lnkbt; 4222 Mat_Merge_SeqsToMPI *merge; 4223 PetscContainer container; 4224 4225 PetscFunctionBegin; 4226 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4227 4228 /* make sure it is a PETSc comm */ 4229 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4230 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4231 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4232 4233 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4234 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4235 4236 /* determine row ownership */ 4237 /*---------------------------------------------------------*/ 4238 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4239 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4240 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4241 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4242 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4243 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4244 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4245 4246 m = merge->rowmap->n; 4247 M = merge->rowmap->N; 4248 owners = merge->rowmap->range; 4249 4250 /* determine the number of messages to send, their lengths */ 4251 /*---------------------------------------------------------*/ 4252 len_s = merge->len_s; 4253 4254 len = 0; /* length of buf_si[] */ 4255 merge->nsend = 0; 4256 for (proc=0; proc<size; proc++){ 4257 len_si[proc] = 0; 4258 if (proc == rank){ 4259 len_s[proc] = 0; 4260 } else { 4261 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4262 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4263 } 4264 if (len_s[proc]) { 4265 merge->nsend++; 4266 nrows = 0; 4267 for (i=owners[proc]; i<owners[proc+1]; i++){ 4268 if (ai[i+1] > ai[i]) nrows++; 4269 } 4270 len_si[proc] = 2*(nrows+1); 4271 len += len_si[proc]; 4272 } 4273 } 4274 4275 /* determine the number and length of messages to receive for ij-structure */ 4276 /*-------------------------------------------------------------------------*/ 4277 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4278 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4279 4280 /* post the Irecv of j-structure */ 4281 /*-------------------------------*/ 4282 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4283 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4284 4285 /* post the Isend of j-structure */ 4286 /*--------------------------------*/ 4287 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4288 4289 for (proc=0, k=0; proc<size; proc++){ 4290 if (!len_s[proc]) continue; 4291 i = owners[proc]; 4292 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4293 k++; 4294 } 4295 4296 /* receives and sends of j-structure are complete */ 4297 /*------------------------------------------------*/ 4298 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4299 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4300 4301 /* send and recv i-structure */ 4302 /*---------------------------*/ 4303 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4304 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4305 4306 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4307 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4308 for (proc=0,k=0; proc<size; proc++){ 4309 if (!len_s[proc]) continue; 4310 /* form outgoing message for i-structure: 4311 buf_si[0]: nrows to be sent 4312 [1:nrows]: row index (global) 4313 [nrows+1:2*nrows+1]: i-structure index 4314 */ 4315 /*-------------------------------------------*/ 4316 nrows = len_si[proc]/2 - 1; 4317 buf_si_i = buf_si + nrows+1; 4318 buf_si[0] = nrows; 4319 buf_si_i[0] = 0; 4320 nrows = 0; 4321 for (i=owners[proc]; i<owners[proc+1]; i++){ 4322 anzi = ai[i+1] - ai[i]; 4323 if (anzi) { 4324 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4325 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4326 nrows++; 4327 } 4328 } 4329 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4330 k++; 4331 buf_si += len_si[proc]; 4332 } 4333 4334 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4335 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4336 4337 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4338 for (i=0; i<merge->nrecv; i++){ 4339 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4340 } 4341 4342 ierr = PetscFree(len_si);CHKERRQ(ierr); 4343 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4344 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4345 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4346 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4347 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4348 ierr = PetscFree(status);CHKERRQ(ierr); 4349 4350 /* compute a local seq matrix in each processor */ 4351 /*----------------------------------------------*/ 4352 /* allocate bi array and free space for accumulating nonzero column info */ 4353 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4354 bi[0] = 0; 4355 4356 /* create and initialize a linked list */ 4357 nlnk = N+1; 4358 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4359 4360 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4361 len = 0; 4362 len = ai[owners[rank+1]] - ai[owners[rank]]; 4363 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4364 current_space = free_space; 4365 4366 /* determine symbolic info for each local row */ 4367 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4368 4369 for (k=0; k<merge->nrecv; k++){ 4370 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4371 nrows = *buf_ri_k[k]; 4372 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4373 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4374 } 4375 4376 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4377 len = 0; 4378 for (i=0;i<m;i++) { 4379 bnzi = 0; 4380 /* add local non-zero cols of this proc's seqmat into lnk */ 4381 arow = owners[rank] + i; 4382 anzi = ai[arow+1] - ai[arow]; 4383 aj = a->j + ai[arow]; 4384 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4385 bnzi += nlnk; 4386 /* add received col data into lnk */ 4387 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4388 if (i == *nextrow[k]) { /* i-th row */ 4389 anzi = *(nextai[k]+1) - *nextai[k]; 4390 aj = buf_rj[k] + *nextai[k]; 4391 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4392 bnzi += nlnk; 4393 nextrow[k]++; nextai[k]++; 4394 } 4395 } 4396 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4397 4398 /* if free space is not available, make more free space */ 4399 if (current_space->local_remaining<bnzi) { 4400 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4401 nspacedouble++; 4402 } 4403 /* copy data into free space, then initialize lnk */ 4404 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4405 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4406 4407 current_space->array += bnzi; 4408 current_space->local_used += bnzi; 4409 current_space->local_remaining -= bnzi; 4410 4411 bi[i+1] = bi[i] + bnzi; 4412 } 4413 4414 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4415 4416 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4417 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4418 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4419 4420 /* create symbolic parallel matrix B_mpi */ 4421 /*---------------------------------------*/ 4422 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4423 if (n==PETSC_DECIDE) { 4424 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4425 } else { 4426 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4427 } 4428 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4429 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4430 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4431 4432 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4433 B_mpi->assembled = PETSC_FALSE; 4434 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4435 merge->bi = bi; 4436 merge->bj = bj; 4437 merge->buf_ri = buf_ri; 4438 merge->buf_rj = buf_rj; 4439 merge->coi = PETSC_NULL; 4440 merge->coj = PETSC_NULL; 4441 merge->owners_co = PETSC_NULL; 4442 4443 /* attach the supporting struct to B_mpi for reuse */ 4444 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4445 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4446 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4447 *mpimat = B_mpi; 4448 4449 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4450 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4451 PetscFunctionReturn(0); 4452 } 4453 4454 #undef __FUNCT__ 4455 #define __FUNCT__ "MatMerge_SeqsToMPI" 4456 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4457 { 4458 PetscErrorCode ierr; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4462 if (scall == MAT_INITIAL_MATRIX){ 4463 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4464 } 4465 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4466 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4467 PetscFunctionReturn(0); 4468 } 4469 4470 #undef __FUNCT__ 4471 #define __FUNCT__ "MatGetLocalMat" 4472 /*@ 4473 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4474 4475 Not Collective 4476 4477 Input Parameters: 4478 + A - the matrix 4479 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4480 4481 Output Parameter: 4482 . A_loc - the local sequential matrix generated 4483 4484 Level: developer 4485 4486 @*/ 4487 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4488 { 4489 PetscErrorCode ierr; 4490 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4491 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4492 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4493 MatScalar *aa=a->a,*ba=b->a,*cam; 4494 PetscScalar *ca; 4495 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4496 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4497 4498 PetscFunctionBegin; 4499 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4500 if (scall == MAT_INITIAL_MATRIX){ 4501 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4502 ci[0] = 0; 4503 for (i=0; i<am; i++){ 4504 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4505 } 4506 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4507 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4508 k = 0; 4509 for (i=0; i<am; i++) { 4510 ncols_o = bi[i+1] - bi[i]; 4511 ncols_d = ai[i+1] - ai[i]; 4512 /* off-diagonal portion of A */ 4513 for (jo=0; jo<ncols_o; jo++) { 4514 col = cmap[*bj]; 4515 if (col >= cstart) break; 4516 cj[k] = col; bj++; 4517 ca[k++] = *ba++; 4518 } 4519 /* diagonal portion of A */ 4520 for (j=0; j<ncols_d; j++) { 4521 cj[k] = cstart + *aj++; 4522 ca[k++] = *aa++; 4523 } 4524 /* off-diagonal portion of A */ 4525 for (j=jo; j<ncols_o; j++) { 4526 cj[k] = cmap[*bj++]; 4527 ca[k++] = *ba++; 4528 } 4529 } 4530 /* put together the new matrix */ 4531 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4532 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4533 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4534 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4535 mat->free_a = PETSC_TRUE; 4536 mat->free_ij = PETSC_TRUE; 4537 mat->nonew = 0; 4538 } else if (scall == MAT_REUSE_MATRIX){ 4539 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4540 ci = mat->i; cj = mat->j; cam = mat->a; 4541 for (i=0; i<am; i++) { 4542 /* off-diagonal portion of A */ 4543 ncols_o = bi[i+1] - bi[i]; 4544 for (jo=0; jo<ncols_o; jo++) { 4545 col = cmap[*bj]; 4546 if (col >= cstart) break; 4547 *cam++ = *ba++; bj++; 4548 } 4549 /* diagonal portion of A */ 4550 ncols_d = ai[i+1] - ai[i]; 4551 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4552 /* off-diagonal portion of A */ 4553 for (j=jo; j<ncols_o; j++) { 4554 *cam++ = *ba++; bj++; 4555 } 4556 } 4557 } else { 4558 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4559 } 4560 4561 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4562 PetscFunctionReturn(0); 4563 } 4564 4565 #undef __FUNCT__ 4566 #define __FUNCT__ "MatGetLocalMatCondensed" 4567 /*@C 4568 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4569 4570 Not Collective 4571 4572 Input Parameters: 4573 + A - the matrix 4574 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4575 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4576 4577 Output Parameter: 4578 . A_loc - the local sequential matrix generated 4579 4580 Level: developer 4581 4582 @*/ 4583 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4584 { 4585 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4586 PetscErrorCode ierr; 4587 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4588 IS isrowa,iscola; 4589 Mat *aloc; 4590 4591 PetscFunctionBegin; 4592 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4593 if (!row){ 4594 start = A->rmap->rstart; end = A->rmap->rend; 4595 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4596 } else { 4597 isrowa = *row; 4598 } 4599 if (!col){ 4600 start = A->cmap->rstart; 4601 cmap = a->garray; 4602 nzA = a->A->cmap->n; 4603 nzB = a->B->cmap->n; 4604 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4605 ncols = 0; 4606 for (i=0; i<nzB; i++) { 4607 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4608 else break; 4609 } 4610 imark = i; 4611 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4612 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4613 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4614 ierr = PetscFree(idx);CHKERRQ(ierr); 4615 } else { 4616 iscola = *col; 4617 } 4618 if (scall != MAT_INITIAL_MATRIX){ 4619 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4620 aloc[0] = *A_loc; 4621 } 4622 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4623 *A_loc = aloc[0]; 4624 ierr = PetscFree(aloc);CHKERRQ(ierr); 4625 if (!row){ 4626 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4627 } 4628 if (!col){ 4629 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4630 } 4631 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4632 PetscFunctionReturn(0); 4633 } 4634 4635 #undef __FUNCT__ 4636 #define __FUNCT__ "MatGetBrowsOfAcols" 4637 /*@C 4638 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4639 4640 Collective on Mat 4641 4642 Input Parameters: 4643 + A,B - the matrices in mpiaij format 4644 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4645 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4646 4647 Output Parameter: 4648 + rowb, colb - index sets of rows and columns of B to extract 4649 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4650 - B_seq - the sequential matrix generated 4651 4652 Level: developer 4653 4654 @*/ 4655 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4656 { 4657 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4658 PetscErrorCode ierr; 4659 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4660 IS isrowb,iscolb; 4661 Mat *bseq; 4662 4663 PetscFunctionBegin; 4664 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4665 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4666 } 4667 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4668 4669 if (scall == MAT_INITIAL_MATRIX){ 4670 start = A->cmap->rstart; 4671 cmap = a->garray; 4672 nzA = a->A->cmap->n; 4673 nzB = a->B->cmap->n; 4674 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4675 ncols = 0; 4676 for (i=0; i<nzB; i++) { /* row < local row index */ 4677 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4678 else break; 4679 } 4680 imark = i; 4681 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4682 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4683 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4684 ierr = PetscFree(idx);CHKERRQ(ierr); 4685 *brstart = imark; 4686 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4687 } else { 4688 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4689 isrowb = *rowb; iscolb = *colb; 4690 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4691 bseq[0] = *B_seq; 4692 } 4693 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4694 *B_seq = bseq[0]; 4695 ierr = PetscFree(bseq);CHKERRQ(ierr); 4696 if (!rowb){ 4697 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4698 } else { 4699 *rowb = isrowb; 4700 } 4701 if (!colb){ 4702 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4703 } else { 4704 *colb = iscolb; 4705 } 4706 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4707 PetscFunctionReturn(0); 4708 } 4709 4710 #undef __FUNCT__ 4711 #define __FUNCT__ "MatGetBrowsOfAoCols" 4712 /*@C 4713 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4714 of the OFF-DIAGONAL portion of local A 4715 4716 Collective on Mat 4717 4718 Input Parameters: 4719 + A,B - the matrices in mpiaij format 4720 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4721 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4722 . startsj_r - similar to startsj for receives 4723 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4724 4725 Output Parameter: 4726 + B_oth - the sequential matrix generated 4727 4728 Level: developer 4729 4730 @*/ 4731 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4732 { 4733 VecScatter_MPI_General *gen_to,*gen_from; 4734 PetscErrorCode ierr; 4735 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4736 Mat_SeqAIJ *b_oth; 4737 VecScatter ctx=a->Mvctx; 4738 MPI_Comm comm=((PetscObject)ctx)->comm; 4739 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4740 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4741 PetscScalar *rvalues,*svalues; 4742 MatScalar *b_otha,*bufa,*bufA; 4743 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4744 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4745 MPI_Status *sstatus,rstatus; 4746 PetscMPIInt jj; 4747 PetscInt *cols,sbs,rbs; 4748 PetscScalar *vals; 4749 4750 PetscFunctionBegin; 4751 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4752 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4753 } 4754 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4755 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4756 4757 gen_to = (VecScatter_MPI_General*)ctx->todata; 4758 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4759 rvalues = gen_from->values; /* holds the length of receiving row */ 4760 svalues = gen_to->values; /* holds the length of sending row */ 4761 nrecvs = gen_from->n; 4762 nsends = gen_to->n; 4763 4764 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4765 srow = gen_to->indices; /* local row index to be sent */ 4766 sstarts = gen_to->starts; 4767 sprocs = gen_to->procs; 4768 sstatus = gen_to->sstatus; 4769 sbs = gen_to->bs; 4770 rstarts = gen_from->starts; 4771 rprocs = gen_from->procs; 4772 rbs = gen_from->bs; 4773 4774 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4775 if (scall == MAT_INITIAL_MATRIX){ 4776 /* i-array */ 4777 /*---------*/ 4778 /* post receives */ 4779 for (i=0; i<nrecvs; i++){ 4780 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4781 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4782 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4783 } 4784 4785 /* pack the outgoing message */ 4786 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4787 sstartsj[0] = 0; rstartsj[0] = 0; 4788 len = 0; /* total length of j or a array to be sent */ 4789 k = 0; 4790 for (i=0; i<nsends; i++){ 4791 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4792 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4793 for (j=0; j<nrows; j++) { 4794 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4795 for (l=0; l<sbs; l++){ 4796 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4797 rowlen[j*sbs+l] = ncols; 4798 len += ncols; 4799 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4800 } 4801 k++; 4802 } 4803 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4804 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4805 } 4806 /* recvs and sends of i-array are completed */ 4807 i = nrecvs; 4808 while (i--) { 4809 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4810 } 4811 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4812 4813 /* allocate buffers for sending j and a arrays */ 4814 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4815 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4816 4817 /* create i-array of B_oth */ 4818 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4819 b_othi[0] = 0; 4820 len = 0; /* total length of j or a array to be received */ 4821 k = 0; 4822 for (i=0; i<nrecvs; i++){ 4823 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4824 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4825 for (j=0; j<nrows; j++) { 4826 b_othi[k+1] = b_othi[k] + rowlen[j]; 4827 len += rowlen[j]; k++; 4828 } 4829 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4830 } 4831 4832 /* allocate space for j and a arrrays of B_oth */ 4833 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4834 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4835 4836 /* j-array */ 4837 /*---------*/ 4838 /* post receives of j-array */ 4839 for (i=0; i<nrecvs; i++){ 4840 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4841 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4842 } 4843 4844 /* pack the outgoing message j-array */ 4845 k = 0; 4846 for (i=0; i<nsends; i++){ 4847 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4848 bufJ = bufj+sstartsj[i]; 4849 for (j=0; j<nrows; j++) { 4850 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4851 for (ll=0; ll<sbs; ll++){ 4852 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4853 for (l=0; l<ncols; l++){ 4854 *bufJ++ = cols[l]; 4855 } 4856 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4857 } 4858 } 4859 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4860 } 4861 4862 /* recvs and sends of j-array are completed */ 4863 i = nrecvs; 4864 while (i--) { 4865 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4866 } 4867 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4868 } else if (scall == MAT_REUSE_MATRIX){ 4869 sstartsj = *startsj; 4870 rstartsj = *startsj_r; 4871 bufa = *bufa_ptr; 4872 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4873 b_otha = b_oth->a; 4874 } else { 4875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4876 } 4877 4878 /* a-array */ 4879 /*---------*/ 4880 /* post receives of a-array */ 4881 for (i=0; i<nrecvs; i++){ 4882 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4883 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4884 } 4885 4886 /* pack the outgoing message a-array */ 4887 k = 0; 4888 for (i=0; i<nsends; i++){ 4889 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4890 bufA = bufa+sstartsj[i]; 4891 for (j=0; j<nrows; j++) { 4892 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4893 for (ll=0; ll<sbs; ll++){ 4894 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4895 for (l=0; l<ncols; l++){ 4896 *bufA++ = vals[l]; 4897 } 4898 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4899 } 4900 } 4901 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4902 } 4903 /* recvs and sends of a-array are completed */ 4904 i = nrecvs; 4905 while (i--) { 4906 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4907 } 4908 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4909 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4910 4911 if (scall == MAT_INITIAL_MATRIX){ 4912 /* put together the new matrix */ 4913 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4914 4915 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4916 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4917 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4918 b_oth->free_a = PETSC_TRUE; 4919 b_oth->free_ij = PETSC_TRUE; 4920 b_oth->nonew = 0; 4921 4922 ierr = PetscFree(bufj);CHKERRQ(ierr); 4923 if (!startsj || !bufa_ptr){ 4924 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4925 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4926 } else { 4927 *startsj = sstartsj; 4928 *startsj_r = rstartsj; 4929 *bufa_ptr = bufa; 4930 } 4931 } 4932 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4933 PetscFunctionReturn(0); 4934 } 4935 4936 #undef __FUNCT__ 4937 #define __FUNCT__ "MatGetCommunicationStructs" 4938 /*@C 4939 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4940 4941 Not Collective 4942 4943 Input Parameters: 4944 . A - The matrix in mpiaij format 4945 4946 Output Parameter: 4947 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4948 . colmap - A map from global column index to local index into lvec 4949 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4950 4951 Level: developer 4952 4953 @*/ 4954 #if defined (PETSC_USE_CTABLE) 4955 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4956 #else 4957 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4958 #endif 4959 { 4960 Mat_MPIAIJ *a; 4961 4962 PetscFunctionBegin; 4963 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4964 PetscValidPointer(lvec, 2); 4965 PetscValidPointer(colmap, 3); 4966 PetscValidPointer(multScatter, 4); 4967 a = (Mat_MPIAIJ *) A->data; 4968 if (lvec) *lvec = a->lvec; 4969 if (colmap) *colmap = a->colmap; 4970 if (multScatter) *multScatter = a->Mvctx; 4971 PetscFunctionReturn(0); 4972 } 4973 4974 EXTERN_C_BEGIN 4975 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4976 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4977 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4978 EXTERN_C_END 4979 4980 #undef __FUNCT__ 4981 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4982 /* 4983 Computes (B'*A')' since computing B*A directly is untenable 4984 4985 n p p 4986 ( ) ( ) ( ) 4987 m ( A ) * n ( B ) = m ( C ) 4988 ( ) ( ) ( ) 4989 4990 */ 4991 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4992 { 4993 PetscErrorCode ierr; 4994 Mat At,Bt,Ct; 4995 4996 PetscFunctionBegin; 4997 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4998 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4999 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5000 ierr = MatDestroy(At);CHKERRQ(ierr); 5001 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5002 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5003 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5004 PetscFunctionReturn(0); 5005 } 5006 5007 #undef __FUNCT__ 5008 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5009 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5010 { 5011 PetscErrorCode ierr; 5012 PetscInt m=A->rmap->n,n=B->cmap->n; 5013 Mat Cmat; 5014 5015 PetscFunctionBegin; 5016 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5017 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5018 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5019 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5020 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5021 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5022 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5023 *C = Cmat; 5024 PetscFunctionReturn(0); 5025 } 5026 5027 /* ----------------------------------------------------------------*/ 5028 #undef __FUNCT__ 5029 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5030 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5031 { 5032 PetscErrorCode ierr; 5033 5034 PetscFunctionBegin; 5035 if (scall == MAT_INITIAL_MATRIX){ 5036 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5037 } 5038 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5039 PetscFunctionReturn(0); 5040 } 5041 5042 EXTERN_C_BEGIN 5043 #if defined(PETSC_HAVE_MUMPS) 5044 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5045 #endif 5046 #if defined(PETSC_HAVE_PASTIX) 5047 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5048 #endif 5049 #if defined(PETSC_HAVE_SUPERLU_DIST) 5050 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5051 #endif 5052 #if defined(PETSC_HAVE_SPOOLES) 5053 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5054 #endif 5055 EXTERN_C_END 5056 5057 /*MC 5058 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5059 5060 Options Database Keys: 5061 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5062 5063 Level: beginner 5064 5065 .seealso: MatCreateMPIAIJ() 5066 M*/ 5067 5068 EXTERN_C_BEGIN 5069 #undef __FUNCT__ 5070 #define __FUNCT__ "MatCreate_MPIAIJ" 5071 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5072 { 5073 Mat_MPIAIJ *b; 5074 PetscErrorCode ierr; 5075 PetscMPIInt size; 5076 5077 PetscFunctionBegin; 5078 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5079 5080 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5081 B->data = (void*)b; 5082 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5083 B->rmap->bs = 1; 5084 B->assembled = PETSC_FALSE; 5085 B->mapping = 0; 5086 5087 B->insertmode = NOT_SET_VALUES; 5088 b->size = size; 5089 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5090 5091 /* build cache for off array entries formed */ 5092 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5093 b->donotstash = PETSC_FALSE; 5094 b->colmap = 0; 5095 b->garray = 0; 5096 b->roworiented = PETSC_TRUE; 5097 5098 /* stuff used for matrix vector multiply */ 5099 b->lvec = PETSC_NULL; 5100 b->Mvctx = PETSC_NULL; 5101 5102 /* stuff for MatGetRow() */ 5103 b->rowindices = 0; 5104 b->rowvalues = 0; 5105 b->getrowactive = PETSC_FALSE; 5106 5107 #if defined(PETSC_HAVE_SPOOLES) 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5109 "MatGetFactor_mpiaij_spooles", 5110 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5111 #endif 5112 #if defined(PETSC_HAVE_MUMPS) 5113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5114 "MatGetFactor_aij_mumps", 5115 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5116 #endif 5117 #if defined(PETSC_HAVE_PASTIX) 5118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5119 "MatGetFactor_mpiaij_pastix", 5120 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5121 #endif 5122 #if defined(PETSC_HAVE_SUPERLU_DIST) 5123 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5124 "MatGetFactor_mpiaij_superlu_dist", 5125 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5126 #endif 5127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5128 "MatStoreValues_MPIAIJ", 5129 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5131 "MatRetrieveValues_MPIAIJ", 5132 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5134 "MatGetDiagonalBlock_MPIAIJ", 5135 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5136 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5137 "MatIsTranspose_MPIAIJ", 5138 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5139 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5140 "MatMPIAIJSetPreallocation_MPIAIJ", 5141 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5142 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5143 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5144 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5145 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5146 "MatDiagonalScaleLocal_MPIAIJ", 5147 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5149 "MatConvert_MPIAIJ_MPIAIJPERM", 5150 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5151 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5152 "MatConvert_MPIAIJ_MPIAIJCRL", 5153 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5155 "MatConvert_MPIAIJ_MPISBAIJ", 5156 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5157 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5158 "MatMatMult_MPIDense_MPIAIJ", 5159 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5161 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5162 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5163 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5164 "MatMatMultNumeric_MPIDense_MPIAIJ", 5165 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5166 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5167 PetscFunctionReturn(0); 5168 } 5169 EXTERN_C_END 5170 5171 #undef __FUNCT__ 5172 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5173 /*@ 5174 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5175 and "off-diagonal" part of the matrix in CSR format. 5176 5177 Collective on MPI_Comm 5178 5179 Input Parameters: 5180 + comm - MPI communicator 5181 . m - number of local rows (Cannot be PETSC_DECIDE) 5182 . n - This value should be the same as the local size used in creating the 5183 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5184 calculated if N is given) For square matrices n is almost always m. 5185 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5186 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5187 . i - row indices for "diagonal" portion of matrix 5188 . j - column indices 5189 . a - matrix values 5190 . oi - row indices for "off-diagonal" portion of matrix 5191 . oj - column indices 5192 - oa - matrix values 5193 5194 Output Parameter: 5195 . mat - the matrix 5196 5197 Level: advanced 5198 5199 Notes: 5200 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5201 5202 The i and j indices are 0 based 5203 5204 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5205 5206 This sets local rows and cannot be used to set off-processor values. 5207 5208 You cannot later use MatSetValues() to change values in this matrix. 5209 5210 .keywords: matrix, aij, compressed row, sparse, parallel 5211 5212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5213 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5214 @*/ 5215 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5216 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5217 { 5218 PetscErrorCode ierr; 5219 Mat_MPIAIJ *maij; 5220 5221 PetscFunctionBegin; 5222 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5223 if (i[0]) { 5224 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5225 } 5226 if (oi[0]) { 5227 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5228 } 5229 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5230 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5231 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5232 maij = (Mat_MPIAIJ*) (*mat)->data; 5233 maij->donotstash = PETSC_TRUE; 5234 (*mat)->preallocated = PETSC_TRUE; 5235 5236 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5237 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5238 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5239 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5240 5241 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5242 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5243 5244 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5245 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5246 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5247 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5248 5249 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5250 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5251 PetscFunctionReturn(0); 5252 } 5253 5254 /* 5255 Special version for direct calls from Fortran 5256 */ 5257 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5258 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5259 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5260 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5261 #endif 5262 5263 /* Change these macros so can be used in void function */ 5264 #undef CHKERRQ 5265 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5266 #undef SETERRQ2 5267 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5268 #undef SETERRQ 5269 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5270 5271 EXTERN_C_BEGIN 5272 #undef __FUNCT__ 5273 #define __FUNCT__ "matsetvaluesmpiaij_" 5274 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5275 { 5276 Mat mat = *mmat; 5277 PetscInt m = *mm, n = *mn; 5278 InsertMode addv = *maddv; 5279 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5280 PetscScalar value; 5281 PetscErrorCode ierr; 5282 5283 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5284 if (mat->insertmode == NOT_SET_VALUES) { 5285 mat->insertmode = addv; 5286 } 5287 #if defined(PETSC_USE_DEBUG) 5288 else if (mat->insertmode != addv) { 5289 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5290 } 5291 #endif 5292 { 5293 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5294 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5295 PetscTruth roworiented = aij->roworiented; 5296 5297 /* Some Variables required in the macro */ 5298 Mat A = aij->A; 5299 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5300 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5301 MatScalar *aa = a->a; 5302 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5303 Mat B = aij->B; 5304 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5305 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5306 MatScalar *ba = b->a; 5307 5308 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5309 PetscInt nonew = a->nonew; 5310 MatScalar *ap1,*ap2; 5311 5312 PetscFunctionBegin; 5313 for (i=0; i<m; i++) { 5314 if (im[i] < 0) continue; 5315 #if defined(PETSC_USE_DEBUG) 5316 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5317 #endif 5318 if (im[i] >= rstart && im[i] < rend) { 5319 row = im[i] - rstart; 5320 lastcol1 = -1; 5321 rp1 = aj + ai[row]; 5322 ap1 = aa + ai[row]; 5323 rmax1 = aimax[row]; 5324 nrow1 = ailen[row]; 5325 low1 = 0; 5326 high1 = nrow1; 5327 lastcol2 = -1; 5328 rp2 = bj + bi[row]; 5329 ap2 = ba + bi[row]; 5330 rmax2 = bimax[row]; 5331 nrow2 = bilen[row]; 5332 low2 = 0; 5333 high2 = nrow2; 5334 5335 for (j=0; j<n; j++) { 5336 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5337 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5338 if (in[j] >= cstart && in[j] < cend){ 5339 col = in[j] - cstart; 5340 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5341 } else if (in[j] < 0) continue; 5342 #if defined(PETSC_USE_DEBUG) 5343 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5344 #endif 5345 else { 5346 if (mat->was_assembled) { 5347 if (!aij->colmap) { 5348 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5349 } 5350 #if defined (PETSC_USE_CTABLE) 5351 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5352 col--; 5353 #else 5354 col = aij->colmap[in[j]] - 1; 5355 #endif 5356 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5357 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5358 col = in[j]; 5359 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5360 B = aij->B; 5361 b = (Mat_SeqAIJ*)B->data; 5362 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5363 rp2 = bj + bi[row]; 5364 ap2 = ba + bi[row]; 5365 rmax2 = bimax[row]; 5366 nrow2 = bilen[row]; 5367 low2 = 0; 5368 high2 = nrow2; 5369 bm = aij->B->rmap->n; 5370 ba = b->a; 5371 } 5372 } else col = in[j]; 5373 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5374 } 5375 } 5376 } else { 5377 if (!aij->donotstash) { 5378 if (roworiented) { 5379 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5380 } else { 5381 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5382 } 5383 } 5384 } 5385 }} 5386 PetscFunctionReturnVoid(); 5387 } 5388 EXTERN_C_END 5389 5390