1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SPD: 1442 A->spd_set = PETSC_TRUE; 1443 A->spd = flg; 1444 if (flg) { 1445 A->symmetric = PETSC_TRUE; 1446 A->structurally_symmetric = PETSC_TRUE; 1447 A->symmetric_set = PETSC_TRUE; 1448 A->structurally_symmetric_set = PETSC_TRUE; 1449 } 1450 break; 1451 case MAT_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_STRUCTURALLY_SYMMETRIC: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_HERMITIAN: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 case MAT_SYMMETRY_ETERNAL: 1461 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1462 break; 1463 default: 1464 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatGetRow_MPIAIJ" 1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1475 PetscErrorCode ierr; 1476 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1477 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1478 PetscInt *cmap,*idx_p; 1479 1480 PetscFunctionBegin; 1481 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1482 mat->getrowactive = PETSC_TRUE; 1483 1484 if (!mat->rowvalues && (idx || v)) { 1485 /* 1486 allocate enough space to hold information from the longest row. 1487 */ 1488 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1489 PetscInt max = 1,tmp; 1490 for (i=0; i<matin->rmap->n; i++) { 1491 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1492 if (max < tmp) { max = tmp; } 1493 } 1494 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1495 } 1496 1497 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1498 lrow = row - rstart; 1499 1500 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1501 if (!v) {pvA = 0; pvB = 0;} 1502 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1503 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1505 nztot = nzA + nzB; 1506 1507 cmap = mat->garray; 1508 if (v || idx) { 1509 if (nztot) { 1510 /* Sort by increasing column numbers, assuming A and B already sorted */ 1511 PetscInt imark = -1; 1512 if (v) { 1513 *v = v_p = mat->rowvalues; 1514 for (i=0; i<nzB; i++) { 1515 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1516 else break; 1517 } 1518 imark = i; 1519 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1520 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1521 } 1522 if (idx) { 1523 *idx = idx_p = mat->rowindices; 1524 if (imark > -1) { 1525 for (i=0; i<imark; i++) { 1526 idx_p[i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 for (i=0; i<nzB; i++) { 1530 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1531 else break; 1532 } 1533 imark = i; 1534 } 1535 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1536 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1537 } 1538 } else { 1539 if (idx) *idx = 0; 1540 if (v) *v = 0; 1541 } 1542 } 1543 *nz = nztot; 1544 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1545 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1554 1555 PetscFunctionBegin; 1556 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1557 aij->getrowactive = PETSC_FALSE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatNorm_MPIAIJ" 1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1564 { 1565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1566 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,cstart = mat->cmap->rstart; 1569 PetscReal sum = 0.0; 1570 MatScalar *v; 1571 1572 PetscFunctionBegin; 1573 if (aij->size == 1) { 1574 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1575 } else { 1576 if (type == NORM_FROBENIUS) { 1577 v = amat->a; 1578 for (i=0; i<amat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 v = bmat->a; 1586 for (i=0; i<bmat->nz; i++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1589 #else 1590 sum += (*v)*(*v); v++; 1591 #endif 1592 } 1593 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1594 *norm = sqrt(*norm); 1595 } else if (type == NORM_1) { /* max column norm */ 1596 PetscReal *tmp,*tmp2; 1597 PetscInt *jj,*garray = aij->garray; 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1600 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1601 *norm = 0.0; 1602 v = amat->a; jj = amat->j; 1603 for (j=0; j<amat->nz; j++) { 1604 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1605 } 1606 v = bmat->a; jj = bmat->j; 1607 for (j=0; j<bmat->nz; j++) { 1608 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1609 } 1610 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1611 for (j=0; j<mat->cmap->N; j++) { 1612 if (tmp2[j] > *norm) *norm = tmp2[j]; 1613 } 1614 ierr = PetscFree(tmp);CHKERRQ(ierr); 1615 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1616 } else if (type == NORM_INFINITY) { /* max row norm */ 1617 PetscReal ntemp = 0.0; 1618 for (j=0; j<aij->A->rmap->n; j++) { 1619 v = amat->a + amat->i[j]; 1620 sum = 0.0; 1621 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1622 sum += PetscAbsScalar(*v); v++; 1623 } 1624 v = bmat->a + bmat->i[j]; 1625 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1626 sum += PetscAbsScalar(*v); v++; 1627 } 1628 if (sum > ntemp) ntemp = sum; 1629 } 1630 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1631 } else { 1632 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1633 } 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatTranspose_MPIAIJ" 1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1641 { 1642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1643 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1644 PetscErrorCode ierr; 1645 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1646 PetscInt cstart=A->cmap->rstart,ncol; 1647 Mat B; 1648 MatScalar *array; 1649 1650 PetscFunctionBegin; 1651 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1652 1653 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1654 ai = Aloc->i; aj = Aloc->j; 1655 bi = Bloc->i; bj = Bloc->j; 1656 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1657 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1658 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1659 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1660 for (i=0; i<ai[ma]; i++){ 1661 d_nnz[aj[i]] ++; 1662 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1663 } 1664 1665 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1666 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1667 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1670 } else { 1671 B = *matout; 1672 } 1673 1674 /* copy over the A part */ 1675 array = Aloc->a; 1676 row = A->rmap->rstart; 1677 for (i=0; i<ma; i++) { 1678 ncol = ai[i+1]-ai[i]; 1679 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; array += ncol; aj += ncol; 1681 } 1682 aj = Aloc->j; 1683 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1684 1685 /* copy over the B part */ 1686 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1687 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1688 array = Bloc->a; 1689 row = A->rmap->rstart; 1690 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1691 cols_tmp = cols; 1692 for (i=0; i<mb; i++) { 1693 ncol = bi[i+1]-bi[i]; 1694 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; array += ncol; cols_tmp += ncol; 1696 } 1697 ierr = PetscFree(cols);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1702 *matout = B; 1703 } else { 1704 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1714 Mat a = aij->A,b = aij->B; 1715 PetscErrorCode ierr; 1716 PetscInt s1,s2,s3; 1717 1718 PetscFunctionBegin; 1719 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1720 if (rr) { 1721 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1722 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1723 /* Overlap communication with computation. */ 1724 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 if (ll) { 1727 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1728 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1729 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1730 } 1731 /* scale the diagonal block */ 1732 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1733 1734 if (rr) { 1735 /* Do a scatter end and then right scale the off-diagonal block */ 1736 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1738 } 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1746 { 1747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1752 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1760 { 1761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatEqual_MPIAIJ" 1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1772 { 1773 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1774 Mat a,b,c,d; 1775 PetscTruth flg; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 a = matA->A; b = matA->B; 1780 c = matB->A; d = matB->B; 1781 1782 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1783 if (flg) { 1784 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1785 } 1786 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatCopy_MPIAIJ" 1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1793 { 1794 PetscErrorCode ierr; 1795 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1796 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 /* because of the column compression in the off-processor part of the matrix a->B, 1802 the number of columns in a->B and b->B may be different, hence we cannot call 1803 the MatCopy() directly on the two parts. If need be, we can provide a more 1804 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1805 then copying the submatrices */ 1806 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1809 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1817 { 1818 PetscErrorCode ierr; 1819 1820 PetscFunctionBegin; 1821 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/0, 2719 0, 2720 0, 2721 0 2722 }; 2723 2724 /* ----------------------------------------------------------------------------------------*/ 2725 2726 EXTERN_C_BEGIN 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2730 { 2731 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2736 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 EXTERN_C_END 2740 2741 EXTERN_C_BEGIN 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2745 { 2746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2751 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 EXTERN_C_END 2755 2756 EXTERN_C_BEGIN 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 PetscInt i; 2764 2765 PetscFunctionBegin; 2766 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2767 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2768 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2769 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2770 2771 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2773 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2774 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2775 if (d_nnz) { 2776 for (i=0; i<B->rmap->n; i++) { 2777 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2778 } 2779 } 2780 if (o_nnz) { 2781 for (i=0; i<B->rmap->n; i++) { 2782 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2783 } 2784 } 2785 b = (Mat_MPIAIJ*)B->data; 2786 2787 if (!B->preallocated) { 2788 /* Explicitly create 2 MATSEQAIJ matrices. */ 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2791 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2795 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2797 } 2798 2799 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2800 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2801 B->preallocated = PETSC_TRUE; 2802 PetscFunctionReturn(0); 2803 } 2804 EXTERN_C_END 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2809 { 2810 Mat mat; 2811 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 *newmat = 0; 2816 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2818 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2819 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2820 a = (Mat_MPIAIJ*)mat->data; 2821 2822 mat->factortype = matin->factortype; 2823 mat->rmap->bs = matin->rmap->bs; 2824 mat->assembled = PETSC_TRUE; 2825 mat->insertmode = NOT_SET_VALUES; 2826 mat->preallocated = PETSC_TRUE; 2827 2828 a->size = oldmat->size; 2829 a->rank = oldmat->rank; 2830 a->donotstash = oldmat->donotstash; 2831 a->roworiented = oldmat->roworiented; 2832 a->rowindices = 0; 2833 a->rowvalues = 0; 2834 a->getrowactive = PETSC_FALSE; 2835 2836 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2837 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2838 2839 if (oldmat->colmap) { 2840 #if defined (PETSC_USE_CTABLE) 2841 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2842 #else 2843 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2845 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2846 #endif 2847 } else a->colmap = 0; 2848 if (oldmat->garray) { 2849 PetscInt len; 2850 len = oldmat->B->cmap->n; 2851 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2852 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2853 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2854 } else a->garray = 0; 2855 2856 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2860 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2862 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2863 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2864 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2865 *newmat = mat; 2866 PetscFunctionReturn(0); 2867 } 2868 2869 /* 2870 Allows sending/receiving larger messages then 2 gigabytes in a single call 2871 */ 2872 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2873 { 2874 int ierr; 2875 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2876 PetscInt i,numchunks; 2877 PetscMPIInt icnt; 2878 2879 numchunks = cnt/CHUNKSIZE + 1; 2880 for (i=0; i<numchunks; i++) { 2881 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2882 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2883 if (type == MPIU_INT) { 2884 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2885 } else if (type == MPIU_SCALAR) { 2886 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2887 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2888 } 2889 return 0; 2890 } 2891 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2892 { 2893 int ierr; 2894 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2895 MPI_Status status; 2896 PetscInt i,numchunks; 2897 PetscMPIInt icnt; 2898 2899 numchunks = cnt/CHUNKSIZE + 1; 2900 for (i=0; i<numchunks; i++) { 2901 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2902 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2903 if (type == MPIU_INT) { 2904 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2905 } else if (type == MPIU_SCALAR) { 2906 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2907 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2908 } 2909 return 0; 2910 } 2911 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatLoad_MPIAIJ" 2914 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2915 { 2916 PetscScalar *vals,*svals; 2917 MPI_Comm comm = ((PetscObject)viewer)->comm; 2918 PetscErrorCode ierr; 2919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2920 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2921 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2922 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2923 PetscInt cend,cstart,n,*rowners,sizesset=1; 2924 int fd; 2925 2926 PetscFunctionBegin; 2927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2928 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2929 if (!rank) { 2930 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2931 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2932 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2933 } 2934 2935 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2936 2937 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2938 M = header[1]; N = header[2]; 2939 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2940 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2941 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2942 2943 /* If global sizes are set, check if they are consistent with that given in the file */ 2944 if (sizesset) { 2945 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2946 } 2947 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2948 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2949 2950 /* determine ownership of all rows */ 2951 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2952 else m = newMat->rmap->n; /* Set by user */ 2953 2954 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2955 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2956 2957 /* First process needs enough room for process with most rows */ 2958 if (!rank) { 2959 mmax = rowners[1]; 2960 for (i=2; i<size; i++) { 2961 mmax = PetscMax(mmax,rowners[i]); 2962 } 2963 } else mmax = m; 2964 2965 rowners[0] = 0; 2966 for (i=2; i<=size; i++) { 2967 rowners[i] += rowners[i-1]; 2968 } 2969 rstart = rowners[rank]; 2970 rend = rowners[rank+1]; 2971 2972 /* distribute row lengths to all processors */ 2973 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2974 if (!rank) { 2975 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2976 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2977 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2978 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2979 for (j=0; j<m; j++) { 2980 procsnz[0] += ourlens[j]; 2981 } 2982 for (i=1; i<size; i++) { 2983 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2984 /* calculate the number of nonzeros on each processor */ 2985 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2986 procsnz[i] += rowlengths[j]; 2987 } 2988 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2989 } 2990 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2991 } else { 2992 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2993 } 2994 2995 if (!rank) { 2996 /* determine max buffer needed and allocate it */ 2997 maxnz = 0; 2998 for (i=0; i<size; i++) { 2999 maxnz = PetscMax(maxnz,procsnz[i]); 3000 } 3001 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3002 3003 /* read in my part of the matrix column indices */ 3004 nz = procsnz[0]; 3005 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3006 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3007 3008 /* read in every one elses and ship off */ 3009 for (i=1; i<size; i++) { 3010 nz = procsnz[i]; 3011 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3012 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3013 } 3014 ierr = PetscFree(cols);CHKERRQ(ierr); 3015 } else { 3016 /* determine buffer space needed for message */ 3017 nz = 0; 3018 for (i=0; i<m; i++) { 3019 nz += ourlens[i]; 3020 } 3021 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3022 3023 /* receive message of column indices*/ 3024 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3025 } 3026 3027 /* determine column ownership if matrix is not square */ 3028 if (N != M) { 3029 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3030 else n = newMat->cmap->n; 3031 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3032 cstart = cend - n; 3033 } else { 3034 cstart = rstart; 3035 cend = rend; 3036 n = cend - cstart; 3037 } 3038 3039 /* loop over local rows, determining number of off diagonal entries */ 3040 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3041 jj = 0; 3042 for (i=0; i<m; i++) { 3043 for (j=0; j<ourlens[i]; j++) { 3044 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3045 jj++; 3046 } 3047 } 3048 3049 for (i=0; i<m; i++) { 3050 ourlens[i] -= offlens[i]; 3051 } 3052 if (!sizesset) { 3053 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3054 } 3055 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3056 3057 for (i=0; i<m; i++) { 3058 ourlens[i] += offlens[i]; 3059 } 3060 3061 if (!rank) { 3062 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3063 3064 /* read in my part of the matrix numerical values */ 3065 nz = procsnz[0]; 3066 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3067 3068 /* insert into matrix */ 3069 jj = rstart; 3070 smycols = mycols; 3071 svals = vals; 3072 for (i=0; i<m; i++) { 3073 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3074 smycols += ourlens[i]; 3075 svals += ourlens[i]; 3076 jj++; 3077 } 3078 3079 /* read in other processors and ship out */ 3080 for (i=1; i<size; i++) { 3081 nz = procsnz[i]; 3082 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3083 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3084 } 3085 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3086 } else { 3087 /* receive numeric values */ 3088 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3089 3090 /* receive message of values*/ 3091 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3092 3093 /* insert into matrix */ 3094 jj = rstart; 3095 smycols = mycols; 3096 svals = vals; 3097 for (i=0; i<m; i++) { 3098 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3099 smycols += ourlens[i]; 3100 svals += ourlens[i]; 3101 jj++; 3102 } 3103 } 3104 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3105 ierr = PetscFree(vals);CHKERRQ(ierr); 3106 ierr = PetscFree(mycols);CHKERRQ(ierr); 3107 ierr = PetscFree(rowners);CHKERRQ(ierr); 3108 3109 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3111 PetscFunctionReturn(0); 3112 } 3113 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3116 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3117 { 3118 PetscErrorCode ierr; 3119 IS iscol_local; 3120 PetscInt csize; 3121 3122 PetscFunctionBegin; 3123 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3124 if (call == MAT_REUSE_MATRIX) { 3125 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3126 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3127 } else { 3128 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3129 } 3130 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3131 if (call == MAT_INITIAL_MATRIX) { 3132 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3133 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3134 } 3135 PetscFunctionReturn(0); 3136 } 3137 3138 #undef __FUNCT__ 3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3140 /* 3141 Not great since it makes two copies of the submatrix, first an SeqAIJ 3142 in local and then by concatenating the local matrices the end result. 3143 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3144 3145 Note: This requires a sequential iscol with all indices. 3146 */ 3147 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3148 { 3149 PetscErrorCode ierr; 3150 PetscMPIInt rank,size; 3151 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3152 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3153 Mat *local,M,Mreuse; 3154 MatScalar *vwork,*aa; 3155 MPI_Comm comm = ((PetscObject)mat)->comm; 3156 Mat_SeqAIJ *aij; 3157 3158 3159 PetscFunctionBegin; 3160 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3161 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3162 3163 if (call == MAT_REUSE_MATRIX) { 3164 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3165 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3166 local = &Mreuse; 3167 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3168 } else { 3169 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3170 Mreuse = *local; 3171 ierr = PetscFree(local);CHKERRQ(ierr); 3172 } 3173 3174 /* 3175 m - number of local rows 3176 n - number of columns (same on all processors) 3177 rstart - first row in new global matrix generated 3178 */ 3179 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3180 if (call == MAT_INITIAL_MATRIX) { 3181 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3182 ii = aij->i; 3183 jj = aij->j; 3184 3185 /* 3186 Determine the number of non-zeros in the diagonal and off-diagonal 3187 portions of the matrix in order to do correct preallocation 3188 */ 3189 3190 /* first get start and end of "diagonal" columns */ 3191 if (csize == PETSC_DECIDE) { 3192 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3193 if (mglobal == n) { /* square matrix */ 3194 nlocal = m; 3195 } else { 3196 nlocal = n/size + ((n % size) > rank); 3197 } 3198 } else { 3199 nlocal = csize; 3200 } 3201 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3202 rstart = rend - nlocal; 3203 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3204 3205 /* next, compute all the lengths */ 3206 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3207 olens = dlens + m; 3208 for (i=0; i<m; i++) { 3209 jend = ii[i+1] - ii[i]; 3210 olen = 0; 3211 dlen = 0; 3212 for (j=0; j<jend; j++) { 3213 if (*jj < rstart || *jj >= rend) olen++; 3214 else dlen++; 3215 jj++; 3216 } 3217 olens[i] = olen; 3218 dlens[i] = dlen; 3219 } 3220 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3221 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3222 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3223 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3224 ierr = PetscFree(dlens);CHKERRQ(ierr); 3225 } else { 3226 PetscInt ml,nl; 3227 3228 M = *newmat; 3229 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3230 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3231 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3232 /* 3233 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3234 rather than the slower MatSetValues(). 3235 */ 3236 M->was_assembled = PETSC_TRUE; 3237 M->assembled = PETSC_FALSE; 3238 } 3239 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3240 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3241 ii = aij->i; 3242 jj = aij->j; 3243 aa = aij->a; 3244 for (i=0; i<m; i++) { 3245 row = rstart + i; 3246 nz = ii[i+1] - ii[i]; 3247 cwork = jj; jj += nz; 3248 vwork = aa; aa += nz; 3249 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3250 } 3251 3252 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 *newmat = M; 3255 3256 /* save submatrix used in processor for next request */ 3257 if (call == MAT_INITIAL_MATRIX) { 3258 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3259 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3260 } 3261 3262 PetscFunctionReturn(0); 3263 } 3264 3265 EXTERN_C_BEGIN 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3268 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3269 { 3270 PetscInt m,cstart, cend,j,nnz,i,d; 3271 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3272 const PetscInt *JJ; 3273 PetscScalar *values; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3278 3279 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3280 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3281 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3282 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3283 m = B->rmap->n; 3284 cstart = B->cmap->rstart; 3285 cend = B->cmap->rend; 3286 rstart = B->rmap->rstart; 3287 3288 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3289 3290 #if defined(PETSC_USE_DEBUGGING) 3291 for (i=0; i<m; i++) { 3292 nnz = Ii[i+1]- Ii[i]; 3293 JJ = J + Ii[i]; 3294 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3295 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3296 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3297 } 3298 #endif 3299 3300 for (i=0; i<m; i++) { 3301 nnz = Ii[i+1]- Ii[i]; 3302 JJ = J + Ii[i]; 3303 nnz_max = PetscMax(nnz_max,nnz); 3304 d = 0; 3305 for (j=0; j<nnz; j++) { 3306 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3307 } 3308 d_nnz[i] = d; 3309 o_nnz[i] = nnz - d; 3310 } 3311 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3312 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3313 3314 if (v) values = (PetscScalar*)v; 3315 else { 3316 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3317 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3318 } 3319 3320 for (i=0; i<m; i++) { 3321 ii = i + rstart; 3322 nnz = Ii[i+1]- Ii[i]; 3323 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3324 } 3325 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 3328 if (!v) { 3329 ierr = PetscFree(values);CHKERRQ(ierr); 3330 } 3331 PetscFunctionReturn(0); 3332 } 3333 EXTERN_C_END 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3337 /*@ 3338 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3339 (the default parallel PETSc format). 3340 3341 Collective on MPI_Comm 3342 3343 Input Parameters: 3344 + B - the matrix 3345 . i - the indices into j for the start of each local row (starts with zero) 3346 . j - the column indices for each local row (starts with zero) 3347 - v - optional values in the matrix 3348 3349 Level: developer 3350 3351 Notes: 3352 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3353 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3354 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3355 3356 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3357 3358 The format which is used for the sparse matrix input, is equivalent to a 3359 row-major ordering.. i.e for the following matrix, the input data expected is 3360 as shown: 3361 3362 1 0 0 3363 2 0 3 P0 3364 ------- 3365 4 5 6 P1 3366 3367 Process0 [P0]: rows_owned=[0,1] 3368 i = {0,1,3} [size = nrow+1 = 2+1] 3369 j = {0,0,2} [size = nz = 6] 3370 v = {1,2,3} [size = nz = 6] 3371 3372 Process1 [P1]: rows_owned=[2] 3373 i = {0,3} [size = nrow+1 = 1+1] 3374 j = {0,1,2} [size = nz = 6] 3375 v = {4,5,6} [size = nz = 6] 3376 3377 .keywords: matrix, aij, compressed row, sparse, parallel 3378 3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3380 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3381 @*/ 3382 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3383 { 3384 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3385 3386 PetscFunctionBegin; 3387 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3388 if (f) { 3389 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3390 } 3391 PetscFunctionReturn(0); 3392 } 3393 3394 #undef __FUNCT__ 3395 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3396 /*@C 3397 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3398 (the default parallel PETSc format). For good matrix assembly performance 3399 the user should preallocate the matrix storage by setting the parameters 3400 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3401 performance can be increased by more than a factor of 50. 3402 3403 Collective on MPI_Comm 3404 3405 Input Parameters: 3406 + A - the matrix 3407 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3408 (same value is used for all local rows) 3409 . d_nnz - array containing the number of nonzeros in the various rows of the 3410 DIAGONAL portion of the local submatrix (possibly different for each row) 3411 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3412 The size of this array is equal to the number of local rows, i.e 'm'. 3413 You must leave room for the diagonal entry even if it is zero. 3414 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3415 submatrix (same value is used for all local rows). 3416 - o_nnz - array containing the number of nonzeros in the various rows of the 3417 OFF-DIAGONAL portion of the local submatrix (possibly different for 3418 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3419 structure. The size of this array is equal to the number 3420 of local rows, i.e 'm'. 3421 3422 If the *_nnz parameter is given then the *_nz parameter is ignored 3423 3424 The AIJ format (also called the Yale sparse matrix format or 3425 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3426 storage. The stored row and column indices begin with zero. 3427 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3428 3429 The parallel matrix is partitioned such that the first m0 rows belong to 3430 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3431 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3432 3433 The DIAGONAL portion of the local submatrix of a processor can be defined 3434 as the submatrix which is obtained by extraction the part corresponding to 3435 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3436 first row that belongs to the processor, r2 is the last row belonging to 3437 the this processor, and c1-c2 is range of indices of the local part of a 3438 vector suitable for applying the matrix to. This is an mxn matrix. In the 3439 common case of a square matrix, the row and column ranges are the same and 3440 the DIAGONAL part is also square. The remaining portion of the local 3441 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3442 3443 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3444 3445 You can call MatGetInfo() to get information on how effective the preallocation was; 3446 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3447 You can also run with the option -info and look for messages with the string 3448 malloc in them to see if additional memory allocation was needed. 3449 3450 Example usage: 3451 3452 Consider the following 8x8 matrix with 34 non-zero values, that is 3453 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3454 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3455 as follows: 3456 3457 .vb 3458 1 2 0 | 0 3 0 | 0 4 3459 Proc0 0 5 6 | 7 0 0 | 8 0 3460 9 0 10 | 11 0 0 | 12 0 3461 ------------------------------------- 3462 13 0 14 | 15 16 17 | 0 0 3463 Proc1 0 18 0 | 19 20 21 | 0 0 3464 0 0 0 | 22 23 0 | 24 0 3465 ------------------------------------- 3466 Proc2 25 26 27 | 0 0 28 | 29 0 3467 30 0 0 | 31 32 33 | 0 34 3468 .ve 3469 3470 This can be represented as a collection of submatrices as: 3471 3472 .vb 3473 A B C 3474 D E F 3475 G H I 3476 .ve 3477 3478 Where the submatrices A,B,C are owned by proc0, D,E,F are 3479 owned by proc1, G,H,I are owned by proc2. 3480 3481 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3482 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3483 The 'M','N' parameters are 8,8, and have the same values on all procs. 3484 3485 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3486 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3487 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3488 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3489 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3490 matrix, ans [DF] as another SeqAIJ matrix. 3491 3492 When d_nz, o_nz parameters are specified, d_nz storage elements are 3493 allocated for every row of the local diagonal submatrix, and o_nz 3494 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3495 One way to choose d_nz and o_nz is to use the max nonzerors per local 3496 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3497 In this case, the values of d_nz,o_nz are: 3498 .vb 3499 proc0 : dnz = 2, o_nz = 2 3500 proc1 : dnz = 3, o_nz = 2 3501 proc2 : dnz = 1, o_nz = 4 3502 .ve 3503 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3504 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3505 for proc3. i.e we are using 12+15+10=37 storage locations to store 3506 34 values. 3507 3508 When d_nnz, o_nnz parameters are specified, the storage is specified 3509 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3510 In the above case the values for d_nnz,o_nnz are: 3511 .vb 3512 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3513 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3514 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3515 .ve 3516 Here the space allocated is sum of all the above values i.e 34, and 3517 hence pre-allocation is perfect. 3518 3519 Level: intermediate 3520 3521 .keywords: matrix, aij, compressed row, sparse, parallel 3522 3523 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3524 MPIAIJ, MatGetInfo() 3525 @*/ 3526 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3527 { 3528 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3529 3530 PetscFunctionBegin; 3531 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3532 if (f) { 3533 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3534 } 3535 PetscFunctionReturn(0); 3536 } 3537 3538 #undef __FUNCT__ 3539 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3540 /*@ 3541 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3542 CSR format the local rows. 3543 3544 Collective on MPI_Comm 3545 3546 Input Parameters: 3547 + comm - MPI communicator 3548 . m - number of local rows (Cannot be PETSC_DECIDE) 3549 . n - This value should be the same as the local size used in creating the 3550 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3551 calculated if N is given) For square matrices n is almost always m. 3552 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3553 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3554 . i - row indices 3555 . j - column indices 3556 - a - matrix values 3557 3558 Output Parameter: 3559 . mat - the matrix 3560 3561 Level: intermediate 3562 3563 Notes: 3564 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3565 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3566 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3567 3568 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3569 3570 The format which is used for the sparse matrix input, is equivalent to a 3571 row-major ordering.. i.e for the following matrix, the input data expected is 3572 as shown: 3573 3574 1 0 0 3575 2 0 3 P0 3576 ------- 3577 4 5 6 P1 3578 3579 Process0 [P0]: rows_owned=[0,1] 3580 i = {0,1,3} [size = nrow+1 = 2+1] 3581 j = {0,0,2} [size = nz = 6] 3582 v = {1,2,3} [size = nz = 6] 3583 3584 Process1 [P1]: rows_owned=[2] 3585 i = {0,3} [size = nrow+1 = 1+1] 3586 j = {0,1,2} [size = nz = 6] 3587 v = {4,5,6} [size = nz = 6] 3588 3589 .keywords: matrix, aij, compressed row, sparse, parallel 3590 3591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3592 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3593 @*/ 3594 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3595 { 3596 PetscErrorCode ierr; 3597 3598 PetscFunctionBegin; 3599 if (i[0]) { 3600 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3601 } 3602 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3603 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3604 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3605 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3606 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3607 PetscFunctionReturn(0); 3608 } 3609 3610 #undef __FUNCT__ 3611 #define __FUNCT__ "MatCreateMPIAIJ" 3612 /*@C 3613 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3614 (the default parallel PETSc format). For good matrix assembly performance 3615 the user should preallocate the matrix storage by setting the parameters 3616 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3617 performance can be increased by more than a factor of 50. 3618 3619 Collective on MPI_Comm 3620 3621 Input Parameters: 3622 + comm - MPI communicator 3623 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3624 This value should be the same as the local size used in creating the 3625 y vector for the matrix-vector product y = Ax. 3626 . n - This value should be the same as the local size used in creating the 3627 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3628 calculated if N is given) For square matrices n is almost always m. 3629 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3630 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3631 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3632 (same value is used for all local rows) 3633 . d_nnz - array containing the number of nonzeros in the various rows of the 3634 DIAGONAL portion of the local submatrix (possibly different for each row) 3635 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3636 The size of this array is equal to the number of local rows, i.e 'm'. 3637 You must leave room for the diagonal entry even if it is zero. 3638 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3639 submatrix (same value is used for all local rows). 3640 - o_nnz - array containing the number of nonzeros in the various rows of the 3641 OFF-DIAGONAL portion of the local submatrix (possibly different for 3642 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3643 structure. The size of this array is equal to the number 3644 of local rows, i.e 'm'. 3645 3646 Output Parameter: 3647 . A - the matrix 3648 3649 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3650 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3651 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3652 3653 Notes: 3654 If the *_nnz parameter is given then the *_nz parameter is ignored 3655 3656 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3657 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3658 storage requirements for this matrix. 3659 3660 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3661 processor than it must be used on all processors that share the object for 3662 that argument. 3663 3664 The user MUST specify either the local or global matrix dimensions 3665 (possibly both). 3666 3667 The parallel matrix is partitioned across processors such that the 3668 first m0 rows belong to process 0, the next m1 rows belong to 3669 process 1, the next m2 rows belong to process 2 etc.. where 3670 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3671 values corresponding to [m x N] submatrix. 3672 3673 The columns are logically partitioned with the n0 columns belonging 3674 to 0th partition, the next n1 columns belonging to the next 3675 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3676 3677 The DIAGONAL portion of the local submatrix on any given processor 3678 is the submatrix corresponding to the rows and columns m,n 3679 corresponding to the given processor. i.e diagonal matrix on 3680 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3681 etc. The remaining portion of the local submatrix [m x (N-n)] 3682 constitute the OFF-DIAGONAL portion. The example below better 3683 illustrates this concept. 3684 3685 For a square global matrix we define each processor's diagonal portion 3686 to be its local rows and the corresponding columns (a square submatrix); 3687 each processor's off-diagonal portion encompasses the remainder of the 3688 local matrix (a rectangular submatrix). 3689 3690 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3691 3692 When calling this routine with a single process communicator, a matrix of 3693 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3694 type of communicator, use the construction mechanism: 3695 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3696 3697 By default, this format uses inodes (identical nodes) when possible. 3698 We search for consecutive rows with the same nonzero structure, thereby 3699 reusing matrix information to achieve increased efficiency. 3700 3701 Options Database Keys: 3702 + -mat_no_inode - Do not use inodes 3703 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3704 - -mat_aij_oneindex - Internally use indexing starting at 1 3705 rather than 0. Note that when calling MatSetValues(), 3706 the user still MUST index entries starting at 0! 3707 3708 3709 Example usage: 3710 3711 Consider the following 8x8 matrix with 34 non-zero values, that is 3712 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3713 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3714 as follows: 3715 3716 .vb 3717 1 2 0 | 0 3 0 | 0 4 3718 Proc0 0 5 6 | 7 0 0 | 8 0 3719 9 0 10 | 11 0 0 | 12 0 3720 ------------------------------------- 3721 13 0 14 | 15 16 17 | 0 0 3722 Proc1 0 18 0 | 19 20 21 | 0 0 3723 0 0 0 | 22 23 0 | 24 0 3724 ------------------------------------- 3725 Proc2 25 26 27 | 0 0 28 | 29 0 3726 30 0 0 | 31 32 33 | 0 34 3727 .ve 3728 3729 This can be represented as a collection of submatrices as: 3730 3731 .vb 3732 A B C 3733 D E F 3734 G H I 3735 .ve 3736 3737 Where the submatrices A,B,C are owned by proc0, D,E,F are 3738 owned by proc1, G,H,I are owned by proc2. 3739 3740 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3741 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3742 The 'M','N' parameters are 8,8, and have the same values on all procs. 3743 3744 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3745 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3746 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3747 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3748 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3749 matrix, ans [DF] as another SeqAIJ matrix. 3750 3751 When d_nz, o_nz parameters are specified, d_nz storage elements are 3752 allocated for every row of the local diagonal submatrix, and o_nz 3753 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3754 One way to choose d_nz and o_nz is to use the max nonzerors per local 3755 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3756 In this case, the values of d_nz,o_nz are: 3757 .vb 3758 proc0 : dnz = 2, o_nz = 2 3759 proc1 : dnz = 3, o_nz = 2 3760 proc2 : dnz = 1, o_nz = 4 3761 .ve 3762 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3763 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3764 for proc3. i.e we are using 12+15+10=37 storage locations to store 3765 34 values. 3766 3767 When d_nnz, o_nnz parameters are specified, the storage is specified 3768 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3769 In the above case the values for d_nnz,o_nnz are: 3770 .vb 3771 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3772 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3773 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3774 .ve 3775 Here the space allocated is sum of all the above values i.e 34, and 3776 hence pre-allocation is perfect. 3777 3778 Level: intermediate 3779 3780 .keywords: matrix, aij, compressed row, sparse, parallel 3781 3782 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3783 MPIAIJ, MatCreateMPIAIJWithArrays() 3784 @*/ 3785 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3786 { 3787 PetscErrorCode ierr; 3788 PetscMPIInt size; 3789 3790 PetscFunctionBegin; 3791 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3792 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3793 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3794 if (size > 1) { 3795 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3796 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3797 } else { 3798 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3799 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3800 } 3801 PetscFunctionReturn(0); 3802 } 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3806 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3807 { 3808 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3809 3810 PetscFunctionBegin; 3811 *Ad = a->A; 3812 *Ao = a->B; 3813 *colmap = a->garray; 3814 PetscFunctionReturn(0); 3815 } 3816 3817 #undef __FUNCT__ 3818 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3819 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3820 { 3821 PetscErrorCode ierr; 3822 PetscInt i; 3823 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3824 3825 PetscFunctionBegin; 3826 if (coloring->ctype == IS_COLORING_GLOBAL) { 3827 ISColoringValue *allcolors,*colors; 3828 ISColoring ocoloring; 3829 3830 /* set coloring for diagonal portion */ 3831 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3832 3833 /* set coloring for off-diagonal portion */ 3834 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3835 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3836 for (i=0; i<a->B->cmap->n; i++) { 3837 colors[i] = allcolors[a->garray[i]]; 3838 } 3839 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3840 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3841 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3842 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3843 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3844 ISColoringValue *colors; 3845 PetscInt *larray; 3846 ISColoring ocoloring; 3847 3848 /* set coloring for diagonal portion */ 3849 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3850 for (i=0; i<a->A->cmap->n; i++) { 3851 larray[i] = i + A->cmap->rstart; 3852 } 3853 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3854 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3855 for (i=0; i<a->A->cmap->n; i++) { 3856 colors[i] = coloring->colors[larray[i]]; 3857 } 3858 ierr = PetscFree(larray);CHKERRQ(ierr); 3859 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3860 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3861 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3862 3863 /* set coloring for off-diagonal portion */ 3864 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3865 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3866 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3867 for (i=0; i<a->B->cmap->n; i++) { 3868 colors[i] = coloring->colors[larray[i]]; 3869 } 3870 ierr = PetscFree(larray);CHKERRQ(ierr); 3871 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3872 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3873 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3874 } else { 3875 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3876 } 3877 3878 PetscFunctionReturn(0); 3879 } 3880 3881 #if defined(PETSC_HAVE_ADIC) 3882 #undef __FUNCT__ 3883 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3884 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3885 { 3886 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3887 PetscErrorCode ierr; 3888 3889 PetscFunctionBegin; 3890 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3891 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3892 PetscFunctionReturn(0); 3893 } 3894 #endif 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3898 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3899 { 3900 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3901 PetscErrorCode ierr; 3902 3903 PetscFunctionBegin; 3904 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3905 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908 3909 #undef __FUNCT__ 3910 #define __FUNCT__ "MatMerge" 3911 /*@ 3912 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3913 matrices from each processor 3914 3915 Collective on MPI_Comm 3916 3917 Input Parameters: 3918 + comm - the communicators the parallel matrix will live on 3919 . inmat - the input sequential matrices 3920 . n - number of local columns (or PETSC_DECIDE) 3921 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3922 3923 Output Parameter: 3924 . outmat - the parallel matrix generated 3925 3926 Level: advanced 3927 3928 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3929 3930 @*/ 3931 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3932 { 3933 PetscErrorCode ierr; 3934 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3935 PetscInt *indx; 3936 PetscScalar *values; 3937 3938 PetscFunctionBegin; 3939 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3940 if (scall == MAT_INITIAL_MATRIX){ 3941 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3942 if (n == PETSC_DECIDE){ 3943 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3944 } 3945 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3946 rstart -= m; 3947 3948 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3949 for (i=0;i<m;i++) { 3950 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3951 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3952 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3953 } 3954 /* This routine will ONLY return MPIAIJ type matrix */ 3955 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3956 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3957 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3958 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3959 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3960 3961 } else if (scall == MAT_REUSE_MATRIX){ 3962 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3963 } else { 3964 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3965 } 3966 3967 for (i=0;i<m;i++) { 3968 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3969 Ii = i + rstart; 3970 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3971 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3972 } 3973 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3974 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 3977 PetscFunctionReturn(0); 3978 } 3979 3980 #undef __FUNCT__ 3981 #define __FUNCT__ "MatFileSplit" 3982 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3983 { 3984 PetscErrorCode ierr; 3985 PetscMPIInt rank; 3986 PetscInt m,N,i,rstart,nnz; 3987 size_t len; 3988 const PetscInt *indx; 3989 PetscViewer out; 3990 char *name; 3991 Mat B; 3992 const PetscScalar *values; 3993 3994 PetscFunctionBegin; 3995 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3996 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3997 /* Should this be the type of the diagonal block of A? */ 3998 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3999 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4000 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4001 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4002 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4003 for (i=0;i<m;i++) { 4004 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4005 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4006 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4007 } 4008 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4009 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4010 4011 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4012 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4013 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4014 sprintf(name,"%s.%d",outfile,rank); 4015 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4016 ierr = PetscFree(name); 4017 ierr = MatView(B,out);CHKERRQ(ierr); 4018 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4019 ierr = MatDestroy(B);CHKERRQ(ierr); 4020 PetscFunctionReturn(0); 4021 } 4022 4023 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4024 #undef __FUNCT__ 4025 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4026 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4027 { 4028 PetscErrorCode ierr; 4029 Mat_Merge_SeqsToMPI *merge; 4030 PetscContainer container; 4031 4032 PetscFunctionBegin; 4033 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4034 if (container) { 4035 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4036 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4037 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4038 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4039 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4040 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4041 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4042 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4043 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4044 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4045 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4046 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4047 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4048 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4049 4050 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4051 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4052 } 4053 ierr = PetscFree(merge);CHKERRQ(ierr); 4054 4055 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4056 PetscFunctionReturn(0); 4057 } 4058 4059 #include "../src/mat/utils/freespace.h" 4060 #include "petscbt.h" 4061 4062 #undef __FUNCT__ 4063 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4064 /*@C 4065 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4066 matrices from each processor 4067 4068 Collective on MPI_Comm 4069 4070 Input Parameters: 4071 + comm - the communicators the parallel matrix will live on 4072 . seqmat - the input sequential matrices 4073 . m - number of local rows (or PETSC_DECIDE) 4074 . n - number of local columns (or PETSC_DECIDE) 4075 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4076 4077 Output Parameter: 4078 . mpimat - the parallel matrix generated 4079 4080 Level: advanced 4081 4082 Notes: 4083 The dimensions of the sequential matrix in each processor MUST be the same. 4084 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4085 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4086 @*/ 4087 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4088 { 4089 PetscErrorCode ierr; 4090 MPI_Comm comm=((PetscObject)mpimat)->comm; 4091 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4092 PetscMPIInt size,rank,taga,*len_s; 4093 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4094 PetscInt proc,m; 4095 PetscInt **buf_ri,**buf_rj; 4096 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4097 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4098 MPI_Request *s_waits,*r_waits; 4099 MPI_Status *status; 4100 MatScalar *aa=a->a; 4101 MatScalar **abuf_r,*ba_i; 4102 Mat_Merge_SeqsToMPI *merge; 4103 PetscContainer container; 4104 4105 PetscFunctionBegin; 4106 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4107 4108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4110 4111 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4112 if (container) { 4113 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4114 } 4115 bi = merge->bi; 4116 bj = merge->bj; 4117 buf_ri = merge->buf_ri; 4118 buf_rj = merge->buf_rj; 4119 4120 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4121 owners = merge->rowmap->range; 4122 len_s = merge->len_s; 4123 4124 /* send and recv matrix values */ 4125 /*-----------------------------*/ 4126 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4127 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4128 4129 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4130 for (proc=0,k=0; proc<size; proc++){ 4131 if (!len_s[proc]) continue; 4132 i = owners[proc]; 4133 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4134 k++; 4135 } 4136 4137 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4138 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4139 ierr = PetscFree(status);CHKERRQ(ierr); 4140 4141 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4142 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4143 4144 /* insert mat values of mpimat */ 4145 /*----------------------------*/ 4146 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4147 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4148 4149 for (k=0; k<merge->nrecv; k++){ 4150 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4151 nrows = *(buf_ri_k[k]); 4152 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4153 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4154 } 4155 4156 /* set values of ba */ 4157 m = merge->rowmap->n; 4158 for (i=0; i<m; i++) { 4159 arow = owners[rank] + i; 4160 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4161 bnzi = bi[i+1] - bi[i]; 4162 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4163 4164 /* add local non-zero vals of this proc's seqmat into ba */ 4165 anzi = ai[arow+1] - ai[arow]; 4166 aj = a->j + ai[arow]; 4167 aa = a->a + ai[arow]; 4168 nextaj = 0; 4169 for (j=0; nextaj<anzi; j++){ 4170 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4171 ba_i[j] += aa[nextaj++]; 4172 } 4173 } 4174 4175 /* add received vals into ba */ 4176 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4177 /* i-th row */ 4178 if (i == *nextrow[k]) { 4179 anzi = *(nextai[k]+1) - *nextai[k]; 4180 aj = buf_rj[k] + *(nextai[k]); 4181 aa = abuf_r[k] + *(nextai[k]); 4182 nextaj = 0; 4183 for (j=0; nextaj<anzi; j++){ 4184 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4185 ba_i[j] += aa[nextaj++]; 4186 } 4187 } 4188 nextrow[k]++; nextai[k]++; 4189 } 4190 } 4191 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4192 } 4193 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4194 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4195 4196 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4197 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4198 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4199 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4200 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4201 PetscFunctionReturn(0); 4202 } 4203 4204 #undef __FUNCT__ 4205 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4206 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4207 { 4208 PetscErrorCode ierr; 4209 Mat B_mpi; 4210 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4211 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4212 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4213 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4214 PetscInt len,proc,*dnz,*onz; 4215 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4216 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4217 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4218 MPI_Status *status; 4219 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4220 PetscBT lnkbt; 4221 Mat_Merge_SeqsToMPI *merge; 4222 PetscContainer container; 4223 4224 PetscFunctionBegin; 4225 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4226 4227 /* make sure it is a PETSc comm */ 4228 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4229 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4230 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4231 4232 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4233 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4234 4235 /* determine row ownership */ 4236 /*---------------------------------------------------------*/ 4237 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4238 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4239 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4240 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4241 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4242 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4243 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4244 4245 m = merge->rowmap->n; 4246 M = merge->rowmap->N; 4247 owners = merge->rowmap->range; 4248 4249 /* determine the number of messages to send, their lengths */ 4250 /*---------------------------------------------------------*/ 4251 len_s = merge->len_s; 4252 4253 len = 0; /* length of buf_si[] */ 4254 merge->nsend = 0; 4255 for (proc=0; proc<size; proc++){ 4256 len_si[proc] = 0; 4257 if (proc == rank){ 4258 len_s[proc] = 0; 4259 } else { 4260 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4261 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4262 } 4263 if (len_s[proc]) { 4264 merge->nsend++; 4265 nrows = 0; 4266 for (i=owners[proc]; i<owners[proc+1]; i++){ 4267 if (ai[i+1] > ai[i]) nrows++; 4268 } 4269 len_si[proc] = 2*(nrows+1); 4270 len += len_si[proc]; 4271 } 4272 } 4273 4274 /* determine the number and length of messages to receive for ij-structure */ 4275 /*-------------------------------------------------------------------------*/ 4276 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4277 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4278 4279 /* post the Irecv of j-structure */ 4280 /*-------------------------------*/ 4281 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4282 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4283 4284 /* post the Isend of j-structure */ 4285 /*--------------------------------*/ 4286 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4287 4288 for (proc=0, k=0; proc<size; proc++){ 4289 if (!len_s[proc]) continue; 4290 i = owners[proc]; 4291 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4292 k++; 4293 } 4294 4295 /* receives and sends of j-structure are complete */ 4296 /*------------------------------------------------*/ 4297 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4298 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4299 4300 /* send and recv i-structure */ 4301 /*---------------------------*/ 4302 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4303 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4304 4305 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4306 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4307 for (proc=0,k=0; proc<size; proc++){ 4308 if (!len_s[proc]) continue; 4309 /* form outgoing message for i-structure: 4310 buf_si[0]: nrows to be sent 4311 [1:nrows]: row index (global) 4312 [nrows+1:2*nrows+1]: i-structure index 4313 */ 4314 /*-------------------------------------------*/ 4315 nrows = len_si[proc]/2 - 1; 4316 buf_si_i = buf_si + nrows+1; 4317 buf_si[0] = nrows; 4318 buf_si_i[0] = 0; 4319 nrows = 0; 4320 for (i=owners[proc]; i<owners[proc+1]; i++){ 4321 anzi = ai[i+1] - ai[i]; 4322 if (anzi) { 4323 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4324 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4325 nrows++; 4326 } 4327 } 4328 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4329 k++; 4330 buf_si += len_si[proc]; 4331 } 4332 4333 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4334 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4335 4336 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4337 for (i=0; i<merge->nrecv; i++){ 4338 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4339 } 4340 4341 ierr = PetscFree(len_si);CHKERRQ(ierr); 4342 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4343 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4344 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4345 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4346 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4347 ierr = PetscFree(status);CHKERRQ(ierr); 4348 4349 /* compute a local seq matrix in each processor */ 4350 /*----------------------------------------------*/ 4351 /* allocate bi array and free space for accumulating nonzero column info */ 4352 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4353 bi[0] = 0; 4354 4355 /* create and initialize a linked list */ 4356 nlnk = N+1; 4357 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4358 4359 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4360 len = 0; 4361 len = ai[owners[rank+1]] - ai[owners[rank]]; 4362 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4363 current_space = free_space; 4364 4365 /* determine symbolic info for each local row */ 4366 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4367 4368 for (k=0; k<merge->nrecv; k++){ 4369 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4370 nrows = *buf_ri_k[k]; 4371 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4372 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4373 } 4374 4375 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4376 len = 0; 4377 for (i=0;i<m;i++) { 4378 bnzi = 0; 4379 /* add local non-zero cols of this proc's seqmat into lnk */ 4380 arow = owners[rank] + i; 4381 anzi = ai[arow+1] - ai[arow]; 4382 aj = a->j + ai[arow]; 4383 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4384 bnzi += nlnk; 4385 /* add received col data into lnk */ 4386 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4387 if (i == *nextrow[k]) { /* i-th row */ 4388 anzi = *(nextai[k]+1) - *nextai[k]; 4389 aj = buf_rj[k] + *nextai[k]; 4390 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4391 bnzi += nlnk; 4392 nextrow[k]++; nextai[k]++; 4393 } 4394 } 4395 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4396 4397 /* if free space is not available, make more free space */ 4398 if (current_space->local_remaining<bnzi) { 4399 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4400 nspacedouble++; 4401 } 4402 /* copy data into free space, then initialize lnk */ 4403 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4404 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4405 4406 current_space->array += bnzi; 4407 current_space->local_used += bnzi; 4408 current_space->local_remaining -= bnzi; 4409 4410 bi[i+1] = bi[i] + bnzi; 4411 } 4412 4413 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4414 4415 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4416 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4417 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4418 4419 /* create symbolic parallel matrix B_mpi */ 4420 /*---------------------------------------*/ 4421 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4422 if (n==PETSC_DECIDE) { 4423 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4424 } else { 4425 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4426 } 4427 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4428 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4429 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4430 4431 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4432 B_mpi->assembled = PETSC_FALSE; 4433 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4434 merge->bi = bi; 4435 merge->bj = bj; 4436 merge->buf_ri = buf_ri; 4437 merge->buf_rj = buf_rj; 4438 merge->coi = PETSC_NULL; 4439 merge->coj = PETSC_NULL; 4440 merge->owners_co = PETSC_NULL; 4441 4442 /* attach the supporting struct to B_mpi for reuse */ 4443 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4444 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4445 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4446 *mpimat = B_mpi; 4447 4448 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4449 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4450 PetscFunctionReturn(0); 4451 } 4452 4453 #undef __FUNCT__ 4454 #define __FUNCT__ "MatMerge_SeqsToMPI" 4455 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4456 { 4457 PetscErrorCode ierr; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4461 if (scall == MAT_INITIAL_MATRIX){ 4462 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4463 } 4464 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4465 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4466 PetscFunctionReturn(0); 4467 } 4468 4469 #undef __FUNCT__ 4470 #define __FUNCT__ "MatGetLocalMat" 4471 /*@ 4472 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4473 4474 Not Collective 4475 4476 Input Parameters: 4477 + A - the matrix 4478 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4479 4480 Output Parameter: 4481 . A_loc - the local sequential matrix generated 4482 4483 Level: developer 4484 4485 @*/ 4486 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4487 { 4488 PetscErrorCode ierr; 4489 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4490 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4491 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4492 MatScalar *aa=a->a,*ba=b->a,*cam; 4493 PetscScalar *ca; 4494 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4495 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4496 4497 PetscFunctionBegin; 4498 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4499 if (scall == MAT_INITIAL_MATRIX){ 4500 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4501 ci[0] = 0; 4502 for (i=0; i<am; i++){ 4503 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4504 } 4505 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4506 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4507 k = 0; 4508 for (i=0; i<am; i++) { 4509 ncols_o = bi[i+1] - bi[i]; 4510 ncols_d = ai[i+1] - ai[i]; 4511 /* off-diagonal portion of A */ 4512 for (jo=0; jo<ncols_o; jo++) { 4513 col = cmap[*bj]; 4514 if (col >= cstart) break; 4515 cj[k] = col; bj++; 4516 ca[k++] = *ba++; 4517 } 4518 /* diagonal portion of A */ 4519 for (j=0; j<ncols_d; j++) { 4520 cj[k] = cstart + *aj++; 4521 ca[k++] = *aa++; 4522 } 4523 /* off-diagonal portion of A */ 4524 for (j=jo; j<ncols_o; j++) { 4525 cj[k] = cmap[*bj++]; 4526 ca[k++] = *ba++; 4527 } 4528 } 4529 /* put together the new matrix */ 4530 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4531 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4532 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4533 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4534 mat->free_a = PETSC_TRUE; 4535 mat->free_ij = PETSC_TRUE; 4536 mat->nonew = 0; 4537 } else if (scall == MAT_REUSE_MATRIX){ 4538 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4539 ci = mat->i; cj = mat->j; cam = mat->a; 4540 for (i=0; i<am; i++) { 4541 /* off-diagonal portion of A */ 4542 ncols_o = bi[i+1] - bi[i]; 4543 for (jo=0; jo<ncols_o; jo++) { 4544 col = cmap[*bj]; 4545 if (col >= cstart) break; 4546 *cam++ = *ba++; bj++; 4547 } 4548 /* diagonal portion of A */ 4549 ncols_d = ai[i+1] - ai[i]; 4550 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4551 /* off-diagonal portion of A */ 4552 for (j=jo; j<ncols_o; j++) { 4553 *cam++ = *ba++; bj++; 4554 } 4555 } 4556 } else { 4557 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4558 } 4559 4560 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4561 PetscFunctionReturn(0); 4562 } 4563 4564 #undef __FUNCT__ 4565 #define __FUNCT__ "MatGetLocalMatCondensed" 4566 /*@C 4567 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4568 4569 Not Collective 4570 4571 Input Parameters: 4572 + A - the matrix 4573 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4574 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4575 4576 Output Parameter: 4577 . A_loc - the local sequential matrix generated 4578 4579 Level: developer 4580 4581 @*/ 4582 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4583 { 4584 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4585 PetscErrorCode ierr; 4586 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4587 IS isrowa,iscola; 4588 Mat *aloc; 4589 4590 PetscFunctionBegin; 4591 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4592 if (!row){ 4593 start = A->rmap->rstart; end = A->rmap->rend; 4594 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4595 } else { 4596 isrowa = *row; 4597 } 4598 if (!col){ 4599 start = A->cmap->rstart; 4600 cmap = a->garray; 4601 nzA = a->A->cmap->n; 4602 nzB = a->B->cmap->n; 4603 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4604 ncols = 0; 4605 for (i=0; i<nzB; i++) { 4606 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4607 else break; 4608 } 4609 imark = i; 4610 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4611 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4612 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4613 ierr = PetscFree(idx);CHKERRQ(ierr); 4614 } else { 4615 iscola = *col; 4616 } 4617 if (scall != MAT_INITIAL_MATRIX){ 4618 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4619 aloc[0] = *A_loc; 4620 } 4621 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4622 *A_loc = aloc[0]; 4623 ierr = PetscFree(aloc);CHKERRQ(ierr); 4624 if (!row){ 4625 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4626 } 4627 if (!col){ 4628 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4629 } 4630 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4631 PetscFunctionReturn(0); 4632 } 4633 4634 #undef __FUNCT__ 4635 #define __FUNCT__ "MatGetBrowsOfAcols" 4636 /*@C 4637 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4638 4639 Collective on Mat 4640 4641 Input Parameters: 4642 + A,B - the matrices in mpiaij format 4643 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4644 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4645 4646 Output Parameter: 4647 + rowb, colb - index sets of rows and columns of B to extract 4648 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4649 - B_seq - the sequential matrix generated 4650 4651 Level: developer 4652 4653 @*/ 4654 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4655 { 4656 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4657 PetscErrorCode ierr; 4658 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4659 IS isrowb,iscolb; 4660 Mat *bseq; 4661 4662 PetscFunctionBegin; 4663 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4664 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4665 } 4666 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4667 4668 if (scall == MAT_INITIAL_MATRIX){ 4669 start = A->cmap->rstart; 4670 cmap = a->garray; 4671 nzA = a->A->cmap->n; 4672 nzB = a->B->cmap->n; 4673 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4674 ncols = 0; 4675 for (i=0; i<nzB; i++) { /* row < local row index */ 4676 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4677 else break; 4678 } 4679 imark = i; 4680 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4681 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4682 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4683 ierr = PetscFree(idx);CHKERRQ(ierr); 4684 *brstart = imark; 4685 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4686 } else { 4687 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4688 isrowb = *rowb; iscolb = *colb; 4689 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4690 bseq[0] = *B_seq; 4691 } 4692 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4693 *B_seq = bseq[0]; 4694 ierr = PetscFree(bseq);CHKERRQ(ierr); 4695 if (!rowb){ 4696 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4697 } else { 4698 *rowb = isrowb; 4699 } 4700 if (!colb){ 4701 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4702 } else { 4703 *colb = iscolb; 4704 } 4705 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4706 PetscFunctionReturn(0); 4707 } 4708 4709 #undef __FUNCT__ 4710 #define __FUNCT__ "MatGetBrowsOfAoCols" 4711 /*@C 4712 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4713 of the OFF-DIAGONAL portion of local A 4714 4715 Collective on Mat 4716 4717 Input Parameters: 4718 + A,B - the matrices in mpiaij format 4719 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4720 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4721 . startsj_r - similar to startsj for receives 4722 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4723 4724 Output Parameter: 4725 + B_oth - the sequential matrix generated 4726 4727 Level: developer 4728 4729 @*/ 4730 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4731 { 4732 VecScatter_MPI_General *gen_to,*gen_from; 4733 PetscErrorCode ierr; 4734 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4735 Mat_SeqAIJ *b_oth; 4736 VecScatter ctx=a->Mvctx; 4737 MPI_Comm comm=((PetscObject)ctx)->comm; 4738 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4739 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4740 PetscScalar *rvalues,*svalues; 4741 MatScalar *b_otha,*bufa,*bufA; 4742 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4743 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4744 MPI_Status *sstatus,rstatus; 4745 PetscMPIInt jj; 4746 PetscInt *cols,sbs,rbs; 4747 PetscScalar *vals; 4748 4749 PetscFunctionBegin; 4750 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4751 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4752 } 4753 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4754 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4755 4756 gen_to = (VecScatter_MPI_General*)ctx->todata; 4757 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4758 rvalues = gen_from->values; /* holds the length of receiving row */ 4759 svalues = gen_to->values; /* holds the length of sending row */ 4760 nrecvs = gen_from->n; 4761 nsends = gen_to->n; 4762 4763 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4764 srow = gen_to->indices; /* local row index to be sent */ 4765 sstarts = gen_to->starts; 4766 sprocs = gen_to->procs; 4767 sstatus = gen_to->sstatus; 4768 sbs = gen_to->bs; 4769 rstarts = gen_from->starts; 4770 rprocs = gen_from->procs; 4771 rbs = gen_from->bs; 4772 4773 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4774 if (scall == MAT_INITIAL_MATRIX){ 4775 /* i-array */ 4776 /*---------*/ 4777 /* post receives */ 4778 for (i=0; i<nrecvs; i++){ 4779 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4780 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4781 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4782 } 4783 4784 /* pack the outgoing message */ 4785 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4786 sstartsj[0] = 0; rstartsj[0] = 0; 4787 len = 0; /* total length of j or a array to be sent */ 4788 k = 0; 4789 for (i=0; i<nsends; i++){ 4790 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4791 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4792 for (j=0; j<nrows; j++) { 4793 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4794 for (l=0; l<sbs; l++){ 4795 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4796 rowlen[j*sbs+l] = ncols; 4797 len += ncols; 4798 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4799 } 4800 k++; 4801 } 4802 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4803 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4804 } 4805 /* recvs and sends of i-array are completed */ 4806 i = nrecvs; 4807 while (i--) { 4808 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4809 } 4810 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4811 4812 /* allocate buffers for sending j and a arrays */ 4813 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4814 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4815 4816 /* create i-array of B_oth */ 4817 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4818 b_othi[0] = 0; 4819 len = 0; /* total length of j or a array to be received */ 4820 k = 0; 4821 for (i=0; i<nrecvs; i++){ 4822 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4823 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4824 for (j=0; j<nrows; j++) { 4825 b_othi[k+1] = b_othi[k] + rowlen[j]; 4826 len += rowlen[j]; k++; 4827 } 4828 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4829 } 4830 4831 /* allocate space for j and a arrrays of B_oth */ 4832 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4833 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4834 4835 /* j-array */ 4836 /*---------*/ 4837 /* post receives of j-array */ 4838 for (i=0; i<nrecvs; i++){ 4839 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4840 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4841 } 4842 4843 /* pack the outgoing message j-array */ 4844 k = 0; 4845 for (i=0; i<nsends; i++){ 4846 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4847 bufJ = bufj+sstartsj[i]; 4848 for (j=0; j<nrows; j++) { 4849 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4850 for (ll=0; ll<sbs; ll++){ 4851 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4852 for (l=0; l<ncols; l++){ 4853 *bufJ++ = cols[l]; 4854 } 4855 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4856 } 4857 } 4858 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4859 } 4860 4861 /* recvs and sends of j-array are completed */ 4862 i = nrecvs; 4863 while (i--) { 4864 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4865 } 4866 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4867 } else if (scall == MAT_REUSE_MATRIX){ 4868 sstartsj = *startsj; 4869 rstartsj = *startsj_r; 4870 bufa = *bufa_ptr; 4871 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4872 b_otha = b_oth->a; 4873 } else { 4874 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4875 } 4876 4877 /* a-array */ 4878 /*---------*/ 4879 /* post receives of a-array */ 4880 for (i=0; i<nrecvs; i++){ 4881 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4882 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4883 } 4884 4885 /* pack the outgoing message a-array */ 4886 k = 0; 4887 for (i=0; i<nsends; i++){ 4888 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4889 bufA = bufa+sstartsj[i]; 4890 for (j=0; j<nrows; j++) { 4891 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4892 for (ll=0; ll<sbs; ll++){ 4893 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4894 for (l=0; l<ncols; l++){ 4895 *bufA++ = vals[l]; 4896 } 4897 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4898 } 4899 } 4900 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4901 } 4902 /* recvs and sends of a-array are completed */ 4903 i = nrecvs; 4904 while (i--) { 4905 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4906 } 4907 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4908 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4909 4910 if (scall == MAT_INITIAL_MATRIX){ 4911 /* put together the new matrix */ 4912 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4913 4914 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4915 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4916 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4917 b_oth->free_a = PETSC_TRUE; 4918 b_oth->free_ij = PETSC_TRUE; 4919 b_oth->nonew = 0; 4920 4921 ierr = PetscFree(bufj);CHKERRQ(ierr); 4922 if (!startsj || !bufa_ptr){ 4923 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4924 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4925 } else { 4926 *startsj = sstartsj; 4927 *startsj_r = rstartsj; 4928 *bufa_ptr = bufa; 4929 } 4930 } 4931 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4932 PetscFunctionReturn(0); 4933 } 4934 4935 #undef __FUNCT__ 4936 #define __FUNCT__ "MatGetCommunicationStructs" 4937 /*@C 4938 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4939 4940 Not Collective 4941 4942 Input Parameters: 4943 . A - The matrix in mpiaij format 4944 4945 Output Parameter: 4946 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4947 . colmap - A map from global column index to local index into lvec 4948 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4949 4950 Level: developer 4951 4952 @*/ 4953 #if defined (PETSC_USE_CTABLE) 4954 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4955 #else 4956 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4957 #endif 4958 { 4959 Mat_MPIAIJ *a; 4960 4961 PetscFunctionBegin; 4962 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4963 PetscValidPointer(lvec, 2); 4964 PetscValidPointer(colmap, 3); 4965 PetscValidPointer(multScatter, 4); 4966 a = (Mat_MPIAIJ *) A->data; 4967 if (lvec) *lvec = a->lvec; 4968 if (colmap) *colmap = a->colmap; 4969 if (multScatter) *multScatter = a->Mvctx; 4970 PetscFunctionReturn(0); 4971 } 4972 4973 EXTERN_C_BEGIN 4974 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4975 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4976 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4977 EXTERN_C_END 4978 4979 #undef __FUNCT__ 4980 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4981 /* 4982 Computes (B'*A')' since computing B*A directly is untenable 4983 4984 n p p 4985 ( ) ( ) ( ) 4986 m ( A ) * n ( B ) = m ( C ) 4987 ( ) ( ) ( ) 4988 4989 */ 4990 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4991 { 4992 PetscErrorCode ierr; 4993 Mat At,Bt,Ct; 4994 4995 PetscFunctionBegin; 4996 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4997 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4998 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4999 ierr = MatDestroy(At);CHKERRQ(ierr); 5000 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5001 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5002 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5003 PetscFunctionReturn(0); 5004 } 5005 5006 #undef __FUNCT__ 5007 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5008 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5009 { 5010 PetscErrorCode ierr; 5011 PetscInt m=A->rmap->n,n=B->cmap->n; 5012 Mat Cmat; 5013 5014 PetscFunctionBegin; 5015 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5016 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5017 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5018 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5019 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5020 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5021 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5022 *C = Cmat; 5023 PetscFunctionReturn(0); 5024 } 5025 5026 /* ----------------------------------------------------------------*/ 5027 #undef __FUNCT__ 5028 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5029 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5030 { 5031 PetscErrorCode ierr; 5032 5033 PetscFunctionBegin; 5034 if (scall == MAT_INITIAL_MATRIX){ 5035 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5036 } 5037 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5038 PetscFunctionReturn(0); 5039 } 5040 5041 EXTERN_C_BEGIN 5042 #if defined(PETSC_HAVE_MUMPS) 5043 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5044 #endif 5045 #if defined(PETSC_HAVE_PASTIX) 5046 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5047 #endif 5048 #if defined(PETSC_HAVE_SUPERLU_DIST) 5049 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5050 #endif 5051 #if defined(PETSC_HAVE_SPOOLES) 5052 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5053 #endif 5054 EXTERN_C_END 5055 5056 /*MC 5057 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5058 5059 Options Database Keys: 5060 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5061 5062 Level: beginner 5063 5064 .seealso: MatCreateMPIAIJ() 5065 M*/ 5066 5067 EXTERN_C_BEGIN 5068 #undef __FUNCT__ 5069 #define __FUNCT__ "MatCreate_MPIAIJ" 5070 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5071 { 5072 Mat_MPIAIJ *b; 5073 PetscErrorCode ierr; 5074 PetscMPIInt size; 5075 5076 PetscFunctionBegin; 5077 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5078 5079 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5080 B->data = (void*)b; 5081 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5082 B->rmap->bs = 1; 5083 B->assembled = PETSC_FALSE; 5084 B->mapping = 0; 5085 5086 B->insertmode = NOT_SET_VALUES; 5087 b->size = size; 5088 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5089 5090 /* build cache for off array entries formed */ 5091 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5092 b->donotstash = PETSC_FALSE; 5093 b->colmap = 0; 5094 b->garray = 0; 5095 b->roworiented = PETSC_TRUE; 5096 5097 /* stuff used for matrix vector multiply */ 5098 b->lvec = PETSC_NULL; 5099 b->Mvctx = PETSC_NULL; 5100 5101 /* stuff for MatGetRow() */ 5102 b->rowindices = 0; 5103 b->rowvalues = 0; 5104 b->getrowactive = PETSC_FALSE; 5105 5106 #if defined(PETSC_HAVE_SPOOLES) 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5108 "MatGetFactor_mpiaij_spooles", 5109 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5110 #endif 5111 #if defined(PETSC_HAVE_MUMPS) 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5113 "MatGetFactor_aij_mumps", 5114 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5115 #endif 5116 #if defined(PETSC_HAVE_PASTIX) 5117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5118 "MatGetFactor_mpiaij_pastix", 5119 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5120 #endif 5121 #if defined(PETSC_HAVE_SUPERLU_DIST) 5122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5123 "MatGetFactor_mpiaij_superlu_dist", 5124 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5125 #endif 5126 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5127 "MatStoreValues_MPIAIJ", 5128 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5129 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5130 "MatRetrieveValues_MPIAIJ", 5131 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5132 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5133 "MatGetDiagonalBlock_MPIAIJ", 5134 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5135 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5136 "MatIsTranspose_MPIAIJ", 5137 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5138 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5139 "MatMPIAIJSetPreallocation_MPIAIJ", 5140 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5141 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5142 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5143 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5144 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5145 "MatDiagonalScaleLocal_MPIAIJ", 5146 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5147 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5148 "MatConvert_MPIAIJ_MPIAIJPERM", 5149 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5151 "MatConvert_MPIAIJ_MPIAIJCRL", 5152 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5153 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5154 "MatConvert_MPIAIJ_MPISBAIJ", 5155 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5156 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5157 "MatMatMult_MPIDense_MPIAIJ", 5158 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5159 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5160 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5161 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5162 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5163 "MatMatMultNumeric_MPIDense_MPIAIJ", 5164 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5165 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5166 PetscFunctionReturn(0); 5167 } 5168 EXTERN_C_END 5169 5170 #undef __FUNCT__ 5171 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5172 /*@ 5173 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5174 and "off-diagonal" part of the matrix in CSR format. 5175 5176 Collective on MPI_Comm 5177 5178 Input Parameters: 5179 + comm - MPI communicator 5180 . m - number of local rows (Cannot be PETSC_DECIDE) 5181 . n - This value should be the same as the local size used in creating the 5182 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5183 calculated if N is given) For square matrices n is almost always m. 5184 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5185 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5186 . i - row indices for "diagonal" portion of matrix 5187 . j - column indices 5188 . a - matrix values 5189 . oi - row indices for "off-diagonal" portion of matrix 5190 . oj - column indices 5191 - oa - matrix values 5192 5193 Output Parameter: 5194 . mat - the matrix 5195 5196 Level: advanced 5197 5198 Notes: 5199 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5200 5201 The i and j indices are 0 based 5202 5203 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5204 5205 This sets local rows and cannot be used to set off-processor values. 5206 5207 You cannot later use MatSetValues() to change values in this matrix. 5208 5209 .keywords: matrix, aij, compressed row, sparse, parallel 5210 5211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5212 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5213 @*/ 5214 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5215 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5216 { 5217 PetscErrorCode ierr; 5218 Mat_MPIAIJ *maij; 5219 5220 PetscFunctionBegin; 5221 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5222 if (i[0]) { 5223 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5224 } 5225 if (oi[0]) { 5226 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5227 } 5228 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5229 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5230 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5231 maij = (Mat_MPIAIJ*) (*mat)->data; 5232 maij->donotstash = PETSC_TRUE; 5233 (*mat)->preallocated = PETSC_TRUE; 5234 5235 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5236 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5237 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5238 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5239 5240 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5241 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5242 5243 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5244 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5245 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5246 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5247 5248 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5249 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5250 PetscFunctionReturn(0); 5251 } 5252 5253 /* 5254 Special version for direct calls from Fortran 5255 */ 5256 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5257 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5258 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5259 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5260 #endif 5261 5262 /* Change these macros so can be used in void function */ 5263 #undef CHKERRQ 5264 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5265 #undef SETERRQ2 5266 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5267 #undef SETERRQ 5268 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5269 5270 EXTERN_C_BEGIN 5271 #undef __FUNCT__ 5272 #define __FUNCT__ "matsetvaluesmpiaij_" 5273 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5274 { 5275 Mat mat = *mmat; 5276 PetscInt m = *mm, n = *mn; 5277 InsertMode addv = *maddv; 5278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5279 PetscScalar value; 5280 PetscErrorCode ierr; 5281 5282 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5283 if (mat->insertmode == NOT_SET_VALUES) { 5284 mat->insertmode = addv; 5285 } 5286 #if defined(PETSC_USE_DEBUG) 5287 else if (mat->insertmode != addv) { 5288 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5289 } 5290 #endif 5291 { 5292 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5293 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5294 PetscTruth roworiented = aij->roworiented; 5295 5296 /* Some Variables required in the macro */ 5297 Mat A = aij->A; 5298 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5299 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5300 MatScalar *aa = a->a; 5301 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5302 Mat B = aij->B; 5303 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5304 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5305 MatScalar *ba = b->a; 5306 5307 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5308 PetscInt nonew = a->nonew; 5309 MatScalar *ap1,*ap2; 5310 5311 PetscFunctionBegin; 5312 for (i=0; i<m; i++) { 5313 if (im[i] < 0) continue; 5314 #if defined(PETSC_USE_DEBUG) 5315 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5316 #endif 5317 if (im[i] >= rstart && im[i] < rend) { 5318 row = im[i] - rstart; 5319 lastcol1 = -1; 5320 rp1 = aj + ai[row]; 5321 ap1 = aa + ai[row]; 5322 rmax1 = aimax[row]; 5323 nrow1 = ailen[row]; 5324 low1 = 0; 5325 high1 = nrow1; 5326 lastcol2 = -1; 5327 rp2 = bj + bi[row]; 5328 ap2 = ba + bi[row]; 5329 rmax2 = bimax[row]; 5330 nrow2 = bilen[row]; 5331 low2 = 0; 5332 high2 = nrow2; 5333 5334 for (j=0; j<n; j++) { 5335 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5336 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5337 if (in[j] >= cstart && in[j] < cend){ 5338 col = in[j] - cstart; 5339 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5340 } else if (in[j] < 0) continue; 5341 #if defined(PETSC_USE_DEBUG) 5342 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5343 #endif 5344 else { 5345 if (mat->was_assembled) { 5346 if (!aij->colmap) { 5347 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5348 } 5349 #if defined (PETSC_USE_CTABLE) 5350 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5351 col--; 5352 #else 5353 col = aij->colmap[in[j]] - 1; 5354 #endif 5355 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5356 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5357 col = in[j]; 5358 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5359 B = aij->B; 5360 b = (Mat_SeqAIJ*)B->data; 5361 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5362 rp2 = bj + bi[row]; 5363 ap2 = ba + bi[row]; 5364 rmax2 = bimax[row]; 5365 nrow2 = bilen[row]; 5366 low2 = 0; 5367 high2 = nrow2; 5368 bm = aij->B->rmap->n; 5369 ba = b->a; 5370 } 5371 } else col = in[j]; 5372 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5373 } 5374 } 5375 } else { 5376 if (!aij->donotstash) { 5377 if (roworiented) { 5378 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5379 } else { 5380 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5381 } 5382 } 5383 } 5384 }} 5385 PetscFunctionReturnVoid(); 5386 } 5387 EXTERN_C_END 5388 5389