1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SPD: 1442 A->spd_set = PETSC_TRUE; 1443 A->spd = flg; 1444 if (flg) { 1445 A->symmetric = PETSC_TRUE; 1446 A->structurally_symmetric = PETSC_TRUE; 1447 A->symmetric_set = PETSC_TRUE; 1448 A->structurally_symmetric_set = PETSC_TRUE; 1449 } 1450 break; 1451 case MAT_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_STRUCTURALLY_SYMMETRIC: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_HERMITIAN: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 case MAT_SYMMETRY_ETERNAL: 1461 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1462 break; 1463 default: 1464 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatGetRow_MPIAIJ" 1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1475 PetscErrorCode ierr; 1476 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1477 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1478 PetscInt *cmap,*idx_p; 1479 1480 PetscFunctionBegin; 1481 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1482 mat->getrowactive = PETSC_TRUE; 1483 1484 if (!mat->rowvalues && (idx || v)) { 1485 /* 1486 allocate enough space to hold information from the longest row. 1487 */ 1488 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1489 PetscInt max = 1,tmp; 1490 for (i=0; i<matin->rmap->n; i++) { 1491 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1492 if (max < tmp) { max = tmp; } 1493 } 1494 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1495 } 1496 1497 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1498 lrow = row - rstart; 1499 1500 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1501 if (!v) {pvA = 0; pvB = 0;} 1502 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1503 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1505 nztot = nzA + nzB; 1506 1507 cmap = mat->garray; 1508 if (v || idx) { 1509 if (nztot) { 1510 /* Sort by increasing column numbers, assuming A and B already sorted */ 1511 PetscInt imark = -1; 1512 if (v) { 1513 *v = v_p = mat->rowvalues; 1514 for (i=0; i<nzB; i++) { 1515 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1516 else break; 1517 } 1518 imark = i; 1519 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1520 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1521 } 1522 if (idx) { 1523 *idx = idx_p = mat->rowindices; 1524 if (imark > -1) { 1525 for (i=0; i<imark; i++) { 1526 idx_p[i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 for (i=0; i<nzB; i++) { 1530 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1531 else break; 1532 } 1533 imark = i; 1534 } 1535 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1536 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1537 } 1538 } else { 1539 if (idx) *idx = 0; 1540 if (v) *v = 0; 1541 } 1542 } 1543 *nz = nztot; 1544 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1545 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1554 1555 PetscFunctionBegin; 1556 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1557 aij->getrowactive = PETSC_FALSE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatNorm_MPIAIJ" 1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1564 { 1565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1566 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,cstart = mat->cmap->rstart; 1569 PetscReal sum = 0.0; 1570 MatScalar *v; 1571 1572 PetscFunctionBegin; 1573 if (aij->size == 1) { 1574 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1575 } else { 1576 if (type == NORM_FROBENIUS) { 1577 v = amat->a; 1578 for (i=0; i<amat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 v = bmat->a; 1586 for (i=0; i<bmat->nz; i++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1589 #else 1590 sum += (*v)*(*v); v++; 1591 #endif 1592 } 1593 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1594 *norm = sqrt(*norm); 1595 } else if (type == NORM_1) { /* max column norm */ 1596 PetscReal *tmp,*tmp2; 1597 PetscInt *jj,*garray = aij->garray; 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1600 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1601 *norm = 0.0; 1602 v = amat->a; jj = amat->j; 1603 for (j=0; j<amat->nz; j++) { 1604 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1605 } 1606 v = bmat->a; jj = bmat->j; 1607 for (j=0; j<bmat->nz; j++) { 1608 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1609 } 1610 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1611 for (j=0; j<mat->cmap->N; j++) { 1612 if (tmp2[j] > *norm) *norm = tmp2[j]; 1613 } 1614 ierr = PetscFree(tmp);CHKERRQ(ierr); 1615 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1616 } else if (type == NORM_INFINITY) { /* max row norm */ 1617 PetscReal ntemp = 0.0; 1618 for (j=0; j<aij->A->rmap->n; j++) { 1619 v = amat->a + amat->i[j]; 1620 sum = 0.0; 1621 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1622 sum += PetscAbsScalar(*v); v++; 1623 } 1624 v = bmat->a + bmat->i[j]; 1625 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1626 sum += PetscAbsScalar(*v); v++; 1627 } 1628 if (sum > ntemp) ntemp = sum; 1629 } 1630 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1631 } else { 1632 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1633 } 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatTranspose_MPIAIJ" 1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1641 { 1642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1643 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1644 PetscErrorCode ierr; 1645 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1646 PetscInt cstart=A->cmap->rstart,ncol; 1647 Mat B; 1648 MatScalar *array; 1649 1650 PetscFunctionBegin; 1651 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1652 1653 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1654 ai = Aloc->i; aj = Aloc->j; 1655 bi = Bloc->i; bj = Bloc->j; 1656 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1657 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1658 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1659 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1660 for (i=0; i<ai[ma]; i++){ 1661 d_nnz[aj[i]] ++; 1662 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1663 } 1664 1665 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1666 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1667 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1670 } else { 1671 B = *matout; 1672 } 1673 1674 /* copy over the A part */ 1675 array = Aloc->a; 1676 row = A->rmap->rstart; 1677 for (i=0; i<ma; i++) { 1678 ncol = ai[i+1]-ai[i]; 1679 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; array += ncol; aj += ncol; 1681 } 1682 aj = Aloc->j; 1683 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1684 1685 /* copy over the B part */ 1686 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1687 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1688 array = Bloc->a; 1689 row = A->rmap->rstart; 1690 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1691 cols_tmp = cols; 1692 for (i=0; i<mb; i++) { 1693 ncol = bi[i+1]-bi[i]; 1694 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; array += ncol; cols_tmp += ncol; 1696 } 1697 ierr = PetscFree(cols);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1702 *matout = B; 1703 } else { 1704 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1714 Mat a = aij->A,b = aij->B; 1715 PetscErrorCode ierr; 1716 PetscInt s1,s2,s3; 1717 1718 PetscFunctionBegin; 1719 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1720 if (rr) { 1721 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1722 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1723 /* Overlap communication with computation. */ 1724 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 if (ll) { 1727 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1728 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1729 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1730 } 1731 /* scale the diagonal block */ 1732 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1733 1734 if (rr) { 1735 /* Do a scatter end and then right scale the off-diagonal block */ 1736 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1738 } 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1746 { 1747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1752 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1760 { 1761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatEqual_MPIAIJ" 1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1772 { 1773 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1774 Mat a,b,c,d; 1775 PetscTruth flg; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 a = matA->A; b = matA->B; 1780 c = matB->A; d = matB->B; 1781 1782 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1783 if (flg) { 1784 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1785 } 1786 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatCopy_MPIAIJ" 1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1793 { 1794 PetscErrorCode ierr; 1795 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1796 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 /* because of the column compression in the off-processor part of the matrix a->B, 1802 the number of columns in a->B and b->B may be different, hence we cannot call 1803 the MatCopy() directly on the two parts. If need be, we can provide a more 1804 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1805 then copying the submatrices */ 1806 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1809 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1817 { 1818 PetscErrorCode ierr; 1819 1820 PetscFunctionBegin; 1821 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/0, 2719 0, 2720 0, 2721 0 2722 }; 2723 2724 /* ----------------------------------------------------------------------------------------*/ 2725 2726 EXTERN_C_BEGIN 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2730 { 2731 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2736 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 EXTERN_C_END 2740 2741 EXTERN_C_BEGIN 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2745 { 2746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2751 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 EXTERN_C_END 2755 2756 EXTERN_C_BEGIN 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 PetscInt i; 2764 2765 PetscFunctionBegin; 2766 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2767 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2768 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2769 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2770 2771 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2773 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2774 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2775 if (d_nnz) { 2776 for (i=0; i<B->rmap->n; i++) { 2777 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2778 } 2779 } 2780 if (o_nnz) { 2781 for (i=0; i<B->rmap->n; i++) { 2782 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2783 } 2784 } 2785 b = (Mat_MPIAIJ*)B->data; 2786 2787 if (!B->preallocated) { 2788 /* Explicitly create 2 MATSEQAIJ matrices. */ 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2791 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2795 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2797 } 2798 2799 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2800 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2801 B->preallocated = PETSC_TRUE; 2802 PetscFunctionReturn(0); 2803 } 2804 EXTERN_C_END 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2809 { 2810 Mat mat; 2811 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 *newmat = 0; 2816 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2818 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2819 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2820 a = (Mat_MPIAIJ*)mat->data; 2821 2822 mat->factortype = matin->factortype; 2823 mat->rmap->bs = matin->rmap->bs; 2824 mat->assembled = PETSC_TRUE; 2825 mat->insertmode = NOT_SET_VALUES; 2826 mat->preallocated = PETSC_TRUE; 2827 2828 a->size = oldmat->size; 2829 a->rank = oldmat->rank; 2830 a->donotstash = oldmat->donotstash; 2831 a->roworiented = oldmat->roworiented; 2832 a->rowindices = 0; 2833 a->rowvalues = 0; 2834 a->getrowactive = PETSC_FALSE; 2835 2836 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2837 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2838 2839 if (oldmat->colmap) { 2840 #if defined (PETSC_USE_CTABLE) 2841 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2842 #else 2843 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2845 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2846 #endif 2847 } else a->colmap = 0; 2848 if (oldmat->garray) { 2849 PetscInt len; 2850 len = oldmat->B->cmap->n; 2851 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2852 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2853 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2854 } else a->garray = 0; 2855 2856 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2860 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2862 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2863 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2864 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2865 *newmat = mat; 2866 PetscFunctionReturn(0); 2867 } 2868 2869 /* 2870 Allows sending/receiving larger messages then 2 gigabytes in a single call 2871 */ 2872 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2873 { 2874 int ierr; 2875 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2876 PetscInt i,numchunks; 2877 PetscMPIInt icnt; 2878 2879 numchunks = cnt/CHUNKSIZE + 1; 2880 for (i=0; i<numchunks; i++) { 2881 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2882 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2883 if (type == MPIU_INT) { 2884 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2885 } else if (type == MPIU_SCALAR) { 2886 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2887 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2888 } 2889 return 0; 2890 } 2891 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2892 { 2893 int ierr; 2894 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2895 MPI_Status status; 2896 PetscInt i,numchunks; 2897 PetscMPIInt icnt; 2898 2899 numchunks = cnt/CHUNKSIZE + 1; 2900 for (i=0; i<numchunks; i++) { 2901 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2902 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2903 if (type == MPIU_INT) { 2904 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2905 } else if (type == MPIU_SCALAR) { 2906 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2907 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2908 } 2909 return 0; 2910 } 2911 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatLoad_MPIAIJ" 2914 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2915 { 2916 PetscScalar *vals,*svals; 2917 MPI_Comm comm = ((PetscObject)viewer)->comm; 2918 PetscErrorCode ierr; 2919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2920 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2921 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2922 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2923 PetscInt cend,cstart,n,*rowners,sizesset=1; 2924 int fd; 2925 2926 PetscFunctionBegin; 2927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2928 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2929 if (!rank) { 2930 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2931 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2932 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2933 } 2934 2935 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2936 2937 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2938 M = header[1]; N = header[2]; 2939 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2940 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2941 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2942 2943 /* If global sizes are set, check if they are consistent with that given in the file */ 2944 if (sizesset) { 2945 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2946 } 2947 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2948 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2949 2950 /* determine ownership of all rows */ 2951 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2952 else m = newMat->rmap->n; /* Set by user */ 2953 2954 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2955 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2956 2957 /* First process needs enough room for process with most rows */ 2958 if (!rank) { 2959 mmax = rowners[1]; 2960 for (i=2; i<size; i++) { 2961 mmax = PetscMax(mmax,rowners[i]); 2962 } 2963 } else mmax = m; 2964 2965 rowners[0] = 0; 2966 for (i=2; i<=size; i++) { 2967 rowners[i] += rowners[i-1]; 2968 } 2969 rstart = rowners[rank]; 2970 rend = rowners[rank+1]; 2971 2972 /* distribute row lengths to all processors */ 2973 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2974 if (!rank) { 2975 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2976 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2977 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2978 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2979 for (j=0; j<m; j++) { 2980 procsnz[0] += ourlens[j]; 2981 } 2982 for (i=1; i<size; i++) { 2983 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2984 /* calculate the number of nonzeros on each processor */ 2985 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2986 procsnz[i] += rowlengths[j]; 2987 } 2988 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2989 } 2990 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2991 } else { 2992 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2993 } 2994 2995 if (!rank) { 2996 /* determine max buffer needed and allocate it */ 2997 maxnz = 0; 2998 for (i=0; i<size; i++) { 2999 maxnz = PetscMax(maxnz,procsnz[i]); 3000 } 3001 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3002 3003 /* read in my part of the matrix column indices */ 3004 nz = procsnz[0]; 3005 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3006 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3007 3008 /* read in every one elses and ship off */ 3009 for (i=1; i<size; i++) { 3010 nz = procsnz[i]; 3011 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3012 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3013 } 3014 ierr = PetscFree(cols);CHKERRQ(ierr); 3015 } else { 3016 /* determine buffer space needed for message */ 3017 nz = 0; 3018 for (i=0; i<m; i++) { 3019 nz += ourlens[i]; 3020 } 3021 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3022 3023 /* receive message of column indices*/ 3024 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3025 } 3026 3027 /* determine column ownership if matrix is not square */ 3028 if (N != M) { 3029 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3030 else n = newMat->cmap->n; 3031 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3032 cstart = cend - n; 3033 } else { 3034 cstart = rstart; 3035 cend = rend; 3036 n = cend - cstart; 3037 } 3038 3039 /* loop over local rows, determining number of off diagonal entries */ 3040 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3041 jj = 0; 3042 for (i=0; i<m; i++) { 3043 for (j=0; j<ourlens[i]; j++) { 3044 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3045 jj++; 3046 } 3047 } 3048 3049 for (i=0; i<m; i++) { 3050 ourlens[i] -= offlens[i]; 3051 } 3052 if (!sizesset) { 3053 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3054 } 3055 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3056 3057 for (i=0; i<m; i++) { 3058 ourlens[i] += offlens[i]; 3059 } 3060 3061 if (!rank) { 3062 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3063 3064 /* read in my part of the matrix numerical values */ 3065 nz = procsnz[0]; 3066 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3067 3068 /* insert into matrix */ 3069 jj = rstart; 3070 smycols = mycols; 3071 svals = vals; 3072 for (i=0; i<m; i++) { 3073 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3074 smycols += ourlens[i]; 3075 svals += ourlens[i]; 3076 jj++; 3077 } 3078 3079 /* read in other processors and ship out */ 3080 for (i=1; i<size; i++) { 3081 nz = procsnz[i]; 3082 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3083 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3084 } 3085 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3086 } else { 3087 /* receive numeric values */ 3088 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3089 3090 /* receive message of values*/ 3091 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3092 3093 /* insert into matrix */ 3094 jj = rstart; 3095 smycols = mycols; 3096 svals = vals; 3097 for (i=0; i<m; i++) { 3098 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3099 smycols += ourlens[i]; 3100 svals += ourlens[i]; 3101 jj++; 3102 } 3103 } 3104 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3105 ierr = PetscFree(vals);CHKERRQ(ierr); 3106 ierr = PetscFree(mycols);CHKERRQ(ierr); 3107 ierr = PetscFree(rowners);CHKERRQ(ierr); 3108 3109 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3111 PetscFunctionReturn(0); 3112 } 3113 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3116 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3117 { 3118 PetscErrorCode ierr; 3119 IS iscol_local; 3120 PetscInt csize; 3121 3122 PetscFunctionBegin; 3123 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3124 if (call == MAT_REUSE_MATRIX) { 3125 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3126 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3127 } else { 3128 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3129 } 3130 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3131 if (call == MAT_INITIAL_MATRIX) { 3132 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3133 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3134 } 3135 PetscFunctionReturn(0); 3136 } 3137 3138 #undef __FUNCT__ 3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3140 /* 3141 Not great since it makes two copies of the submatrix, first an SeqAIJ 3142 in local and then by concatenating the local matrices the end result. 3143 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3144 3145 Note: This requires a sequential iscol with all indices. 3146 */ 3147 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3148 { 3149 PetscErrorCode ierr; 3150 PetscMPIInt rank,size; 3151 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3152 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3153 Mat *local,M,Mreuse; 3154 MatScalar *vwork,*aa; 3155 MPI_Comm comm = ((PetscObject)mat)->comm; 3156 Mat_SeqAIJ *aij; 3157 3158 3159 PetscFunctionBegin; 3160 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3161 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3162 3163 if (call == MAT_REUSE_MATRIX) { 3164 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3165 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3166 local = &Mreuse; 3167 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3168 } else { 3169 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3170 Mreuse = *local; 3171 ierr = PetscFree(local);CHKERRQ(ierr); 3172 } 3173 3174 /* 3175 m - number of local rows 3176 n - number of columns (same on all processors) 3177 rstart - first row in new global matrix generated 3178 */ 3179 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3180 if (call == MAT_INITIAL_MATRIX) { 3181 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3182 ii = aij->i; 3183 jj = aij->j; 3184 3185 /* 3186 Determine the number of non-zeros in the diagonal and off-diagonal 3187 portions of the matrix in order to do correct preallocation 3188 */ 3189 3190 /* first get start and end of "diagonal" columns */ 3191 if (csize == PETSC_DECIDE) { 3192 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3193 if (mglobal == n) { /* square matrix */ 3194 nlocal = m; 3195 } else { 3196 nlocal = n/size + ((n % size) > rank); 3197 } 3198 } else { 3199 nlocal = csize; 3200 } 3201 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3202 rstart = rend - nlocal; 3203 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3204 3205 /* next, compute all the lengths */ 3206 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3207 olens = dlens + m; 3208 for (i=0; i<m; i++) { 3209 jend = ii[i+1] - ii[i]; 3210 olen = 0; 3211 dlen = 0; 3212 for (j=0; j<jend; j++) { 3213 if (*jj < rstart || *jj >= rend) olen++; 3214 else dlen++; 3215 jj++; 3216 } 3217 olens[i] = olen; 3218 dlens[i] = dlen; 3219 } 3220 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3221 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3222 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3223 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3224 ierr = PetscFree(dlens);CHKERRQ(ierr); 3225 } else { 3226 PetscInt ml,nl; 3227 3228 M = *newmat; 3229 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3230 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3231 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3232 /* 3233 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3234 rather than the slower MatSetValues(). 3235 */ 3236 M->was_assembled = PETSC_TRUE; 3237 M->assembled = PETSC_FALSE; 3238 } 3239 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3240 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3241 ii = aij->i; 3242 jj = aij->j; 3243 aa = aij->a; 3244 for (i=0; i<m; i++) { 3245 row = rstart + i; 3246 nz = ii[i+1] - ii[i]; 3247 cwork = jj; jj += nz; 3248 vwork = aa; aa += nz; 3249 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3250 } 3251 3252 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 *newmat = M; 3255 3256 /* save submatrix used in processor for next request */ 3257 if (call == MAT_INITIAL_MATRIX) { 3258 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3259 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3260 } 3261 3262 PetscFunctionReturn(0); 3263 } 3264 3265 EXTERN_C_BEGIN 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3268 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3269 { 3270 PetscInt m,cstart, cend,j,nnz,i,d; 3271 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3272 const PetscInt *JJ; 3273 PetscScalar *values; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3278 3279 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3280 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3281 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3282 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3283 m = B->rmap->n; 3284 cstart = B->cmap->rstart; 3285 cend = B->cmap->rend; 3286 rstart = B->rmap->rstart; 3287 3288 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3289 3290 #if defined(PETSC_USE_DEBUGGING) 3291 for (i=0; i<m; i++) { 3292 nnz = Ii[i+1]- Ii[i]; 3293 JJ = J + Ii[i]; 3294 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3295 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3296 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3297 } 3298 #endif 3299 3300 for (i=0; i<m; i++) { 3301 nnz = Ii[i+1]- Ii[i]; 3302 JJ = J + Ii[i]; 3303 nnz_max = PetscMax(nnz_max,nnz); 3304 d = 0; 3305 for (j=0; j<nnz; j++) { 3306 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3307 } 3308 d_nnz[i] = d; 3309 o_nnz[i] = nnz - d; 3310 } 3311 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3312 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3313 3314 if (v) values = (PetscScalar*)v; 3315 else { 3316 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3317 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3318 } 3319 3320 for (i=0; i<m; i++) { 3321 ii = i + rstart; 3322 nnz = Ii[i+1]- Ii[i]; 3323 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3324 } 3325 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 3328 if (!v) { 3329 ierr = PetscFree(values);CHKERRQ(ierr); 3330 } 3331 PetscFunctionReturn(0); 3332 } 3333 EXTERN_C_END 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3337 /*@ 3338 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3339 (the default parallel PETSc format). 3340 3341 Collective on MPI_Comm 3342 3343 Input Parameters: 3344 + B - the matrix 3345 . i - the indices into j for the start of each local row (starts with zero) 3346 . j - the column indices for each local row (starts with zero) 3347 - v - optional values in the matrix 3348 3349 Level: developer 3350 3351 Notes: 3352 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3353 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3354 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3355 3356 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3357 3358 The format which is used for the sparse matrix input, is equivalent to a 3359 row-major ordering.. i.e for the following matrix, the input data expected is 3360 as shown: 3361 3362 1 0 0 3363 2 0 3 P0 3364 ------- 3365 4 5 6 P1 3366 3367 Process0 [P0]: rows_owned=[0,1] 3368 i = {0,1,3} [size = nrow+1 = 2+1] 3369 j = {0,0,2} [size = nz = 6] 3370 v = {1,2,3} [size = nz = 6] 3371 3372 Process1 [P1]: rows_owned=[2] 3373 i = {0,3} [size = nrow+1 = 1+1] 3374 j = {0,1,2} [size = nz = 6] 3375 v = {4,5,6} [size = nz = 6] 3376 3377 .keywords: matrix, aij, compressed row, sparse, parallel 3378 3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3380 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3381 @*/ 3382 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3383 { 3384 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3385 3386 PetscFunctionBegin; 3387 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3388 if (f) { 3389 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3390 } 3391 PetscFunctionReturn(0); 3392 } 3393 3394 #undef __FUNCT__ 3395 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3396 /*@C 3397 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3398 (the default parallel PETSc format). For good matrix assembly performance 3399 the user should preallocate the matrix storage by setting the parameters 3400 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3401 performance can be increased by more than a factor of 50. 3402 3403 Collective on MPI_Comm 3404 3405 Input Parameters: 3406 + A - the matrix 3407 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3408 (same value is used for all local rows) 3409 . d_nnz - array containing the number of nonzeros in the various rows of the 3410 DIAGONAL portion of the local submatrix (possibly different for each row) 3411 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3412 The size of this array is equal to the number of local rows, i.e 'm'. 3413 You must leave room for the diagonal entry even if it is zero. 3414 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3415 submatrix (same value is used for all local rows). 3416 - o_nnz - array containing the number of nonzeros in the various rows of the 3417 OFF-DIAGONAL portion of the local submatrix (possibly different for 3418 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3419 structure. The size of this array is equal to the number 3420 of local rows, i.e 'm'. 3421 3422 If the *_nnz parameter is given then the *_nz parameter is ignored 3423 3424 The AIJ format (also called the Yale sparse matrix format or 3425 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3426 storage. The stored row and column indices begin with zero. 3427 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3428 3429 The parallel matrix is partitioned such that the first m0 rows belong to 3430 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3431 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3432 3433 The DIAGONAL portion of the local submatrix of a processor can be defined 3434 as the submatrix which is obtained by extraction the part corresponding 3435 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3436 first row that belongs to the processor, and r2 is the last row belonging 3437 to the this processor. This is a square mxm matrix. The remaining portion 3438 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3439 3440 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3441 3442 You can call MatGetInfo() to get information on how effective the preallocation was; 3443 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3444 You can also run with the option -info and look for messages with the string 3445 malloc in them to see if additional memory allocation was needed. 3446 3447 Example usage: 3448 3449 Consider the following 8x8 matrix with 34 non-zero values, that is 3450 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3451 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3452 as follows: 3453 3454 .vb 3455 1 2 0 | 0 3 0 | 0 4 3456 Proc0 0 5 6 | 7 0 0 | 8 0 3457 9 0 10 | 11 0 0 | 12 0 3458 ------------------------------------- 3459 13 0 14 | 15 16 17 | 0 0 3460 Proc1 0 18 0 | 19 20 21 | 0 0 3461 0 0 0 | 22 23 0 | 24 0 3462 ------------------------------------- 3463 Proc2 25 26 27 | 0 0 28 | 29 0 3464 30 0 0 | 31 32 33 | 0 34 3465 .ve 3466 3467 This can be represented as a collection of submatrices as: 3468 3469 .vb 3470 A B C 3471 D E F 3472 G H I 3473 .ve 3474 3475 Where the submatrices A,B,C are owned by proc0, D,E,F are 3476 owned by proc1, G,H,I are owned by proc2. 3477 3478 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3479 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3480 The 'M','N' parameters are 8,8, and have the same values on all procs. 3481 3482 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3483 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3484 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3485 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3486 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3487 matrix, ans [DF] as another SeqAIJ matrix. 3488 3489 When d_nz, o_nz parameters are specified, d_nz storage elements are 3490 allocated for every row of the local diagonal submatrix, and o_nz 3491 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3492 One way to choose d_nz and o_nz is to use the max nonzerors per local 3493 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3494 In this case, the values of d_nz,o_nz are: 3495 .vb 3496 proc0 : dnz = 2, o_nz = 2 3497 proc1 : dnz = 3, o_nz = 2 3498 proc2 : dnz = 1, o_nz = 4 3499 .ve 3500 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3501 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3502 for proc3. i.e we are using 12+15+10=37 storage locations to store 3503 34 values. 3504 3505 When d_nnz, o_nnz parameters are specified, the storage is specified 3506 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3507 In the above case the values for d_nnz,o_nnz are: 3508 .vb 3509 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3510 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3511 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3512 .ve 3513 Here the space allocated is sum of all the above values i.e 34, and 3514 hence pre-allocation is perfect. 3515 3516 Level: intermediate 3517 3518 .keywords: matrix, aij, compressed row, sparse, parallel 3519 3520 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3521 MPIAIJ, MatGetInfo() 3522 @*/ 3523 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3524 { 3525 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3526 3527 PetscFunctionBegin; 3528 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3529 if (f) { 3530 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3531 } 3532 PetscFunctionReturn(0); 3533 } 3534 3535 #undef __FUNCT__ 3536 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3537 /*@ 3538 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3539 CSR format the local rows. 3540 3541 Collective on MPI_Comm 3542 3543 Input Parameters: 3544 + comm - MPI communicator 3545 . m - number of local rows (Cannot be PETSC_DECIDE) 3546 . n - This value should be the same as the local size used in creating the 3547 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3548 calculated if N is given) For square matrices n is almost always m. 3549 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3550 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3551 . i - row indices 3552 . j - column indices 3553 - a - matrix values 3554 3555 Output Parameter: 3556 . mat - the matrix 3557 3558 Level: intermediate 3559 3560 Notes: 3561 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3562 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3563 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3564 3565 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3566 3567 The format which is used for the sparse matrix input, is equivalent to a 3568 row-major ordering.. i.e for the following matrix, the input data expected is 3569 as shown: 3570 3571 1 0 0 3572 2 0 3 P0 3573 ------- 3574 4 5 6 P1 3575 3576 Process0 [P0]: rows_owned=[0,1] 3577 i = {0,1,3} [size = nrow+1 = 2+1] 3578 j = {0,0,2} [size = nz = 6] 3579 v = {1,2,3} [size = nz = 6] 3580 3581 Process1 [P1]: rows_owned=[2] 3582 i = {0,3} [size = nrow+1 = 1+1] 3583 j = {0,1,2} [size = nz = 6] 3584 v = {4,5,6} [size = nz = 6] 3585 3586 .keywords: matrix, aij, compressed row, sparse, parallel 3587 3588 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3589 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3590 @*/ 3591 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3592 { 3593 PetscErrorCode ierr; 3594 3595 PetscFunctionBegin; 3596 if (i[0]) { 3597 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3598 } 3599 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3600 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3601 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3602 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3603 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3604 PetscFunctionReturn(0); 3605 } 3606 3607 #undef __FUNCT__ 3608 #define __FUNCT__ "MatCreateMPIAIJ" 3609 /*@C 3610 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3611 (the default parallel PETSc format). For good matrix assembly performance 3612 the user should preallocate the matrix storage by setting the parameters 3613 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3614 performance can be increased by more than a factor of 50. 3615 3616 Collective on MPI_Comm 3617 3618 Input Parameters: 3619 + comm - MPI communicator 3620 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3621 This value should be the same as the local size used in creating the 3622 y vector for the matrix-vector product y = Ax. 3623 . n - This value should be the same as the local size used in creating the 3624 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3625 calculated if N is given) For square matrices n is almost always m. 3626 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3627 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3628 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3629 (same value is used for all local rows) 3630 . d_nnz - array containing the number of nonzeros in the various rows of the 3631 DIAGONAL portion of the local submatrix (possibly different for each row) 3632 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3633 The size of this array is equal to the number of local rows, i.e 'm'. 3634 You must leave room for the diagonal entry even if it is zero. 3635 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3636 submatrix (same value is used for all local rows). 3637 - o_nnz - array containing the number of nonzeros in the various rows of the 3638 OFF-DIAGONAL portion of the local submatrix (possibly different for 3639 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3640 structure. The size of this array is equal to the number 3641 of local rows, i.e 'm'. 3642 3643 Output Parameter: 3644 . A - the matrix 3645 3646 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3647 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3648 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3649 3650 Notes: 3651 If the *_nnz parameter is given then the *_nz parameter is ignored 3652 3653 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3654 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3655 storage requirements for this matrix. 3656 3657 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3658 processor than it must be used on all processors that share the object for 3659 that argument. 3660 3661 The user MUST specify either the local or global matrix dimensions 3662 (possibly both). 3663 3664 The parallel matrix is partitioned across processors such that the 3665 first m0 rows belong to process 0, the next m1 rows belong to 3666 process 1, the next m2 rows belong to process 2 etc.. where 3667 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3668 values corresponding to [m x N] submatrix. 3669 3670 The columns are logically partitioned with the n0 columns belonging 3671 to 0th partition, the next n1 columns belonging to the next 3672 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3673 3674 The DIAGONAL portion of the local submatrix on any given processor 3675 is the submatrix corresponding to the rows and columns m,n 3676 corresponding to the given processor. i.e diagonal matrix on 3677 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3678 etc. The remaining portion of the local submatrix [m x (N-n)] 3679 constitute the OFF-DIAGONAL portion. The example below better 3680 illustrates this concept. 3681 3682 For a square global matrix we define each processor's diagonal portion 3683 to be its local rows and the corresponding columns (a square submatrix); 3684 each processor's off-diagonal portion encompasses the remainder of the 3685 local matrix (a rectangular submatrix). 3686 3687 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3688 3689 When calling this routine with a single process communicator, a matrix of 3690 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3691 type of communicator, use the construction mechanism: 3692 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3693 3694 By default, this format uses inodes (identical nodes) when possible. 3695 We search for consecutive rows with the same nonzero structure, thereby 3696 reusing matrix information to achieve increased efficiency. 3697 3698 Options Database Keys: 3699 + -mat_no_inode - Do not use inodes 3700 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3701 - -mat_aij_oneindex - Internally use indexing starting at 1 3702 rather than 0. Note that when calling MatSetValues(), 3703 the user still MUST index entries starting at 0! 3704 3705 3706 Example usage: 3707 3708 Consider the following 8x8 matrix with 34 non-zero values, that is 3709 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3710 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3711 as follows: 3712 3713 .vb 3714 1 2 0 | 0 3 0 | 0 4 3715 Proc0 0 5 6 | 7 0 0 | 8 0 3716 9 0 10 | 11 0 0 | 12 0 3717 ------------------------------------- 3718 13 0 14 | 15 16 17 | 0 0 3719 Proc1 0 18 0 | 19 20 21 | 0 0 3720 0 0 0 | 22 23 0 | 24 0 3721 ------------------------------------- 3722 Proc2 25 26 27 | 0 0 28 | 29 0 3723 30 0 0 | 31 32 33 | 0 34 3724 .ve 3725 3726 This can be represented as a collection of submatrices as: 3727 3728 .vb 3729 A B C 3730 D E F 3731 G H I 3732 .ve 3733 3734 Where the submatrices A,B,C are owned by proc0, D,E,F are 3735 owned by proc1, G,H,I are owned by proc2. 3736 3737 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3738 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3739 The 'M','N' parameters are 8,8, and have the same values on all procs. 3740 3741 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3742 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3743 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3744 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3745 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3746 matrix, ans [DF] as another SeqAIJ matrix. 3747 3748 When d_nz, o_nz parameters are specified, d_nz storage elements are 3749 allocated for every row of the local diagonal submatrix, and o_nz 3750 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3751 One way to choose d_nz and o_nz is to use the max nonzerors per local 3752 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3753 In this case, the values of d_nz,o_nz are: 3754 .vb 3755 proc0 : dnz = 2, o_nz = 2 3756 proc1 : dnz = 3, o_nz = 2 3757 proc2 : dnz = 1, o_nz = 4 3758 .ve 3759 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3760 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3761 for proc3. i.e we are using 12+15+10=37 storage locations to store 3762 34 values. 3763 3764 When d_nnz, o_nnz parameters are specified, the storage is specified 3765 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3766 In the above case the values for d_nnz,o_nnz are: 3767 .vb 3768 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3769 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3770 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3771 .ve 3772 Here the space allocated is sum of all the above values i.e 34, and 3773 hence pre-allocation is perfect. 3774 3775 Level: intermediate 3776 3777 .keywords: matrix, aij, compressed row, sparse, parallel 3778 3779 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3780 MPIAIJ, MatCreateMPIAIJWithArrays() 3781 @*/ 3782 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3783 { 3784 PetscErrorCode ierr; 3785 PetscMPIInt size; 3786 3787 PetscFunctionBegin; 3788 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3789 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3790 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3791 if (size > 1) { 3792 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3793 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3794 } else { 3795 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3796 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3797 } 3798 PetscFunctionReturn(0); 3799 } 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3803 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3804 { 3805 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3806 3807 PetscFunctionBegin; 3808 *Ad = a->A; 3809 *Ao = a->B; 3810 *colmap = a->garray; 3811 PetscFunctionReturn(0); 3812 } 3813 3814 #undef __FUNCT__ 3815 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3816 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3817 { 3818 PetscErrorCode ierr; 3819 PetscInt i; 3820 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3821 3822 PetscFunctionBegin; 3823 if (coloring->ctype == IS_COLORING_GLOBAL) { 3824 ISColoringValue *allcolors,*colors; 3825 ISColoring ocoloring; 3826 3827 /* set coloring for diagonal portion */ 3828 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3829 3830 /* set coloring for off-diagonal portion */ 3831 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3832 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3833 for (i=0; i<a->B->cmap->n; i++) { 3834 colors[i] = allcolors[a->garray[i]]; 3835 } 3836 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3837 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3838 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3839 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3840 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3841 ISColoringValue *colors; 3842 PetscInt *larray; 3843 ISColoring ocoloring; 3844 3845 /* set coloring for diagonal portion */ 3846 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3847 for (i=0; i<a->A->cmap->n; i++) { 3848 larray[i] = i + A->cmap->rstart; 3849 } 3850 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3851 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3852 for (i=0; i<a->A->cmap->n; i++) { 3853 colors[i] = coloring->colors[larray[i]]; 3854 } 3855 ierr = PetscFree(larray);CHKERRQ(ierr); 3856 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3857 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3858 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3859 3860 /* set coloring for off-diagonal portion */ 3861 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3862 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3863 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3864 for (i=0; i<a->B->cmap->n; i++) { 3865 colors[i] = coloring->colors[larray[i]]; 3866 } 3867 ierr = PetscFree(larray);CHKERRQ(ierr); 3868 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3869 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3870 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3871 } else { 3872 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3873 } 3874 3875 PetscFunctionReturn(0); 3876 } 3877 3878 #if defined(PETSC_HAVE_ADIC) 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3881 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3882 { 3883 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3884 PetscErrorCode ierr; 3885 3886 PetscFunctionBegin; 3887 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3888 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3889 PetscFunctionReturn(0); 3890 } 3891 #endif 3892 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3895 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3896 { 3897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3898 PetscErrorCode ierr; 3899 3900 PetscFunctionBegin; 3901 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3902 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3903 PetscFunctionReturn(0); 3904 } 3905 3906 #undef __FUNCT__ 3907 #define __FUNCT__ "MatMerge" 3908 /*@ 3909 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3910 matrices from each processor 3911 3912 Collective on MPI_Comm 3913 3914 Input Parameters: 3915 + comm - the communicators the parallel matrix will live on 3916 . inmat - the input sequential matrices 3917 . n - number of local columns (or PETSC_DECIDE) 3918 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3919 3920 Output Parameter: 3921 . outmat - the parallel matrix generated 3922 3923 Level: advanced 3924 3925 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3926 3927 @*/ 3928 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3929 { 3930 PetscErrorCode ierr; 3931 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3932 PetscInt *indx; 3933 PetscScalar *values; 3934 3935 PetscFunctionBegin; 3936 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3937 if (scall == MAT_INITIAL_MATRIX){ 3938 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3939 if (n == PETSC_DECIDE){ 3940 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3941 } 3942 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3943 rstart -= m; 3944 3945 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3946 for (i=0;i<m;i++) { 3947 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3948 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3949 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3950 } 3951 /* This routine will ONLY return MPIAIJ type matrix */ 3952 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3953 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3954 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3955 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3956 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3957 3958 } else if (scall == MAT_REUSE_MATRIX){ 3959 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3960 } else { 3961 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3962 } 3963 3964 for (i=0;i<m;i++) { 3965 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3966 Ii = i + rstart; 3967 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3968 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3969 } 3970 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3971 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3972 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3973 3974 PetscFunctionReturn(0); 3975 } 3976 3977 #undef __FUNCT__ 3978 #define __FUNCT__ "MatFileSplit" 3979 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3980 { 3981 PetscErrorCode ierr; 3982 PetscMPIInt rank; 3983 PetscInt m,N,i,rstart,nnz; 3984 size_t len; 3985 const PetscInt *indx; 3986 PetscViewer out; 3987 char *name; 3988 Mat B; 3989 const PetscScalar *values; 3990 3991 PetscFunctionBegin; 3992 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3993 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3994 /* Should this be the type of the diagonal block of A? */ 3995 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3996 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3997 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3998 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3999 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4000 for (i=0;i<m;i++) { 4001 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4002 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4003 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4004 } 4005 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4006 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4007 4008 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4009 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4010 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4011 sprintf(name,"%s.%d",outfile,rank); 4012 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4013 ierr = PetscFree(name); 4014 ierr = MatView(B,out);CHKERRQ(ierr); 4015 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4016 ierr = MatDestroy(B);CHKERRQ(ierr); 4017 PetscFunctionReturn(0); 4018 } 4019 4020 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4021 #undef __FUNCT__ 4022 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4023 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4024 { 4025 PetscErrorCode ierr; 4026 Mat_Merge_SeqsToMPI *merge; 4027 PetscContainer container; 4028 4029 PetscFunctionBegin; 4030 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4031 if (container) { 4032 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4033 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4034 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4035 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4036 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4037 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4038 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4039 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4040 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4041 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4042 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4043 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4044 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4045 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4046 4047 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4048 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4049 } 4050 ierr = PetscFree(merge);CHKERRQ(ierr); 4051 4052 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4053 PetscFunctionReturn(0); 4054 } 4055 4056 #include "../src/mat/utils/freespace.h" 4057 #include "petscbt.h" 4058 4059 #undef __FUNCT__ 4060 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4061 /*@C 4062 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4063 matrices from each processor 4064 4065 Collective on MPI_Comm 4066 4067 Input Parameters: 4068 + comm - the communicators the parallel matrix will live on 4069 . seqmat - the input sequential matrices 4070 . m - number of local rows (or PETSC_DECIDE) 4071 . n - number of local columns (or PETSC_DECIDE) 4072 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4073 4074 Output Parameter: 4075 . mpimat - the parallel matrix generated 4076 4077 Level: advanced 4078 4079 Notes: 4080 The dimensions of the sequential matrix in each processor MUST be the same. 4081 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4082 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4083 @*/ 4084 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4085 { 4086 PetscErrorCode ierr; 4087 MPI_Comm comm=((PetscObject)mpimat)->comm; 4088 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4089 PetscMPIInt size,rank,taga,*len_s; 4090 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4091 PetscInt proc,m; 4092 PetscInt **buf_ri,**buf_rj; 4093 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4094 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4095 MPI_Request *s_waits,*r_waits; 4096 MPI_Status *status; 4097 MatScalar *aa=a->a; 4098 MatScalar **abuf_r,*ba_i; 4099 Mat_Merge_SeqsToMPI *merge; 4100 PetscContainer container; 4101 4102 PetscFunctionBegin; 4103 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4104 4105 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4106 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4107 4108 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4109 if (container) { 4110 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4111 } 4112 bi = merge->bi; 4113 bj = merge->bj; 4114 buf_ri = merge->buf_ri; 4115 buf_rj = merge->buf_rj; 4116 4117 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4118 owners = merge->rowmap->range; 4119 len_s = merge->len_s; 4120 4121 /* send and recv matrix values */ 4122 /*-----------------------------*/ 4123 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4124 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4125 4126 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4127 for (proc=0,k=0; proc<size; proc++){ 4128 if (!len_s[proc]) continue; 4129 i = owners[proc]; 4130 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4131 k++; 4132 } 4133 4134 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4135 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4136 ierr = PetscFree(status);CHKERRQ(ierr); 4137 4138 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4139 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4140 4141 /* insert mat values of mpimat */ 4142 /*----------------------------*/ 4143 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4144 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4145 4146 for (k=0; k<merge->nrecv; k++){ 4147 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4148 nrows = *(buf_ri_k[k]); 4149 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4150 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4151 } 4152 4153 /* set values of ba */ 4154 m = merge->rowmap->n; 4155 for (i=0; i<m; i++) { 4156 arow = owners[rank] + i; 4157 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4158 bnzi = bi[i+1] - bi[i]; 4159 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4160 4161 /* add local non-zero vals of this proc's seqmat into ba */ 4162 anzi = ai[arow+1] - ai[arow]; 4163 aj = a->j + ai[arow]; 4164 aa = a->a + ai[arow]; 4165 nextaj = 0; 4166 for (j=0; nextaj<anzi; j++){ 4167 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4168 ba_i[j] += aa[nextaj++]; 4169 } 4170 } 4171 4172 /* add received vals into ba */ 4173 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4174 /* i-th row */ 4175 if (i == *nextrow[k]) { 4176 anzi = *(nextai[k]+1) - *nextai[k]; 4177 aj = buf_rj[k] + *(nextai[k]); 4178 aa = abuf_r[k] + *(nextai[k]); 4179 nextaj = 0; 4180 for (j=0; nextaj<anzi; j++){ 4181 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4182 ba_i[j] += aa[nextaj++]; 4183 } 4184 } 4185 nextrow[k]++; nextai[k]++; 4186 } 4187 } 4188 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4189 } 4190 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4191 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4192 4193 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4194 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4195 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4196 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4197 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4198 PetscFunctionReturn(0); 4199 } 4200 4201 #undef __FUNCT__ 4202 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4203 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4204 { 4205 PetscErrorCode ierr; 4206 Mat B_mpi; 4207 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4208 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4209 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4210 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4211 PetscInt len,proc,*dnz,*onz; 4212 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4213 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4214 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4215 MPI_Status *status; 4216 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4217 PetscBT lnkbt; 4218 Mat_Merge_SeqsToMPI *merge; 4219 PetscContainer container; 4220 4221 PetscFunctionBegin; 4222 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4223 4224 /* make sure it is a PETSc comm */ 4225 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4226 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4227 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4228 4229 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4230 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4231 4232 /* determine row ownership */ 4233 /*---------------------------------------------------------*/ 4234 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4235 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4236 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4237 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4238 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4239 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4240 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4241 4242 m = merge->rowmap->n; 4243 M = merge->rowmap->N; 4244 owners = merge->rowmap->range; 4245 4246 /* determine the number of messages to send, their lengths */ 4247 /*---------------------------------------------------------*/ 4248 len_s = merge->len_s; 4249 4250 len = 0; /* length of buf_si[] */ 4251 merge->nsend = 0; 4252 for (proc=0; proc<size; proc++){ 4253 len_si[proc] = 0; 4254 if (proc == rank){ 4255 len_s[proc] = 0; 4256 } else { 4257 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4258 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4259 } 4260 if (len_s[proc]) { 4261 merge->nsend++; 4262 nrows = 0; 4263 for (i=owners[proc]; i<owners[proc+1]; i++){ 4264 if (ai[i+1] > ai[i]) nrows++; 4265 } 4266 len_si[proc] = 2*(nrows+1); 4267 len += len_si[proc]; 4268 } 4269 } 4270 4271 /* determine the number and length of messages to receive for ij-structure */ 4272 /*-------------------------------------------------------------------------*/ 4273 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4274 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4275 4276 /* post the Irecv of j-structure */ 4277 /*-------------------------------*/ 4278 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4279 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4280 4281 /* post the Isend of j-structure */ 4282 /*--------------------------------*/ 4283 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4284 4285 for (proc=0, k=0; proc<size; proc++){ 4286 if (!len_s[proc]) continue; 4287 i = owners[proc]; 4288 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4289 k++; 4290 } 4291 4292 /* receives and sends of j-structure are complete */ 4293 /*------------------------------------------------*/ 4294 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4295 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4296 4297 /* send and recv i-structure */ 4298 /*---------------------------*/ 4299 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4300 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4301 4302 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4303 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4304 for (proc=0,k=0; proc<size; proc++){ 4305 if (!len_s[proc]) continue; 4306 /* form outgoing message for i-structure: 4307 buf_si[0]: nrows to be sent 4308 [1:nrows]: row index (global) 4309 [nrows+1:2*nrows+1]: i-structure index 4310 */ 4311 /*-------------------------------------------*/ 4312 nrows = len_si[proc]/2 - 1; 4313 buf_si_i = buf_si + nrows+1; 4314 buf_si[0] = nrows; 4315 buf_si_i[0] = 0; 4316 nrows = 0; 4317 for (i=owners[proc]; i<owners[proc+1]; i++){ 4318 anzi = ai[i+1] - ai[i]; 4319 if (anzi) { 4320 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4321 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4322 nrows++; 4323 } 4324 } 4325 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4326 k++; 4327 buf_si += len_si[proc]; 4328 } 4329 4330 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4331 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4332 4333 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4334 for (i=0; i<merge->nrecv; i++){ 4335 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4336 } 4337 4338 ierr = PetscFree(len_si);CHKERRQ(ierr); 4339 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4340 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4341 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4342 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4343 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4344 ierr = PetscFree(status);CHKERRQ(ierr); 4345 4346 /* compute a local seq matrix in each processor */ 4347 /*----------------------------------------------*/ 4348 /* allocate bi array and free space for accumulating nonzero column info */ 4349 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4350 bi[0] = 0; 4351 4352 /* create and initialize a linked list */ 4353 nlnk = N+1; 4354 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4355 4356 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4357 len = 0; 4358 len = ai[owners[rank+1]] - ai[owners[rank]]; 4359 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4360 current_space = free_space; 4361 4362 /* determine symbolic info for each local row */ 4363 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4364 4365 for (k=0; k<merge->nrecv; k++){ 4366 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4367 nrows = *buf_ri_k[k]; 4368 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4369 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4370 } 4371 4372 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4373 len = 0; 4374 for (i=0;i<m;i++) { 4375 bnzi = 0; 4376 /* add local non-zero cols of this proc's seqmat into lnk */ 4377 arow = owners[rank] + i; 4378 anzi = ai[arow+1] - ai[arow]; 4379 aj = a->j + ai[arow]; 4380 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4381 bnzi += nlnk; 4382 /* add received col data into lnk */ 4383 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4384 if (i == *nextrow[k]) { /* i-th row */ 4385 anzi = *(nextai[k]+1) - *nextai[k]; 4386 aj = buf_rj[k] + *nextai[k]; 4387 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4388 bnzi += nlnk; 4389 nextrow[k]++; nextai[k]++; 4390 } 4391 } 4392 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4393 4394 /* if free space is not available, make more free space */ 4395 if (current_space->local_remaining<bnzi) { 4396 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4397 nspacedouble++; 4398 } 4399 /* copy data into free space, then initialize lnk */ 4400 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4401 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4402 4403 current_space->array += bnzi; 4404 current_space->local_used += bnzi; 4405 current_space->local_remaining -= bnzi; 4406 4407 bi[i+1] = bi[i] + bnzi; 4408 } 4409 4410 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4411 4412 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4413 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4414 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4415 4416 /* create symbolic parallel matrix B_mpi */ 4417 /*---------------------------------------*/ 4418 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4419 if (n==PETSC_DECIDE) { 4420 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4421 } else { 4422 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4423 } 4424 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4425 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4426 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4427 4428 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4429 B_mpi->assembled = PETSC_FALSE; 4430 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4431 merge->bi = bi; 4432 merge->bj = bj; 4433 merge->buf_ri = buf_ri; 4434 merge->buf_rj = buf_rj; 4435 merge->coi = PETSC_NULL; 4436 merge->coj = PETSC_NULL; 4437 merge->owners_co = PETSC_NULL; 4438 4439 /* attach the supporting struct to B_mpi for reuse */ 4440 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4441 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4442 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4443 *mpimat = B_mpi; 4444 4445 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4446 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4447 PetscFunctionReturn(0); 4448 } 4449 4450 #undef __FUNCT__ 4451 #define __FUNCT__ "MatMerge_SeqsToMPI" 4452 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4453 { 4454 PetscErrorCode ierr; 4455 4456 PetscFunctionBegin; 4457 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4458 if (scall == MAT_INITIAL_MATRIX){ 4459 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4460 } 4461 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4462 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4463 PetscFunctionReturn(0); 4464 } 4465 4466 #undef __FUNCT__ 4467 #define __FUNCT__ "MatGetLocalMat" 4468 /*@ 4469 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4470 4471 Not Collective 4472 4473 Input Parameters: 4474 + A - the matrix 4475 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4476 4477 Output Parameter: 4478 . A_loc - the local sequential matrix generated 4479 4480 Level: developer 4481 4482 @*/ 4483 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4484 { 4485 PetscErrorCode ierr; 4486 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4487 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4488 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4489 MatScalar *aa=a->a,*ba=b->a,*cam; 4490 PetscScalar *ca; 4491 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4492 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4493 4494 PetscFunctionBegin; 4495 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4496 if (scall == MAT_INITIAL_MATRIX){ 4497 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4498 ci[0] = 0; 4499 for (i=0; i<am; i++){ 4500 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4501 } 4502 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4503 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4504 k = 0; 4505 for (i=0; i<am; i++) { 4506 ncols_o = bi[i+1] - bi[i]; 4507 ncols_d = ai[i+1] - ai[i]; 4508 /* off-diagonal portion of A */ 4509 for (jo=0; jo<ncols_o; jo++) { 4510 col = cmap[*bj]; 4511 if (col >= cstart) break; 4512 cj[k] = col; bj++; 4513 ca[k++] = *ba++; 4514 } 4515 /* diagonal portion of A */ 4516 for (j=0; j<ncols_d; j++) { 4517 cj[k] = cstart + *aj++; 4518 ca[k++] = *aa++; 4519 } 4520 /* off-diagonal portion of A */ 4521 for (j=jo; j<ncols_o; j++) { 4522 cj[k] = cmap[*bj++]; 4523 ca[k++] = *ba++; 4524 } 4525 } 4526 /* put together the new matrix */ 4527 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4528 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4529 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4530 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4531 mat->free_a = PETSC_TRUE; 4532 mat->free_ij = PETSC_TRUE; 4533 mat->nonew = 0; 4534 } else if (scall == MAT_REUSE_MATRIX){ 4535 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4536 ci = mat->i; cj = mat->j; cam = mat->a; 4537 for (i=0; i<am; i++) { 4538 /* off-diagonal portion of A */ 4539 ncols_o = bi[i+1] - bi[i]; 4540 for (jo=0; jo<ncols_o; jo++) { 4541 col = cmap[*bj]; 4542 if (col >= cstart) break; 4543 *cam++ = *ba++; bj++; 4544 } 4545 /* diagonal portion of A */ 4546 ncols_d = ai[i+1] - ai[i]; 4547 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4548 /* off-diagonal portion of A */ 4549 for (j=jo; j<ncols_o; j++) { 4550 *cam++ = *ba++; bj++; 4551 } 4552 } 4553 } else { 4554 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4555 } 4556 4557 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4558 PetscFunctionReturn(0); 4559 } 4560 4561 #undef __FUNCT__ 4562 #define __FUNCT__ "MatGetLocalMatCondensed" 4563 /*@C 4564 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4565 4566 Not Collective 4567 4568 Input Parameters: 4569 + A - the matrix 4570 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4571 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4572 4573 Output Parameter: 4574 . A_loc - the local sequential matrix generated 4575 4576 Level: developer 4577 4578 @*/ 4579 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4580 { 4581 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4582 PetscErrorCode ierr; 4583 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4584 IS isrowa,iscola; 4585 Mat *aloc; 4586 4587 PetscFunctionBegin; 4588 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4589 if (!row){ 4590 start = A->rmap->rstart; end = A->rmap->rend; 4591 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4592 } else { 4593 isrowa = *row; 4594 } 4595 if (!col){ 4596 start = A->cmap->rstart; 4597 cmap = a->garray; 4598 nzA = a->A->cmap->n; 4599 nzB = a->B->cmap->n; 4600 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4601 ncols = 0; 4602 for (i=0; i<nzB; i++) { 4603 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4604 else break; 4605 } 4606 imark = i; 4607 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4608 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4609 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4610 ierr = PetscFree(idx);CHKERRQ(ierr); 4611 } else { 4612 iscola = *col; 4613 } 4614 if (scall != MAT_INITIAL_MATRIX){ 4615 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4616 aloc[0] = *A_loc; 4617 } 4618 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4619 *A_loc = aloc[0]; 4620 ierr = PetscFree(aloc);CHKERRQ(ierr); 4621 if (!row){ 4622 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4623 } 4624 if (!col){ 4625 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4626 } 4627 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4628 PetscFunctionReturn(0); 4629 } 4630 4631 #undef __FUNCT__ 4632 #define __FUNCT__ "MatGetBrowsOfAcols" 4633 /*@C 4634 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4635 4636 Collective on Mat 4637 4638 Input Parameters: 4639 + A,B - the matrices in mpiaij format 4640 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4641 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4642 4643 Output Parameter: 4644 + rowb, colb - index sets of rows and columns of B to extract 4645 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4646 - B_seq - the sequential matrix generated 4647 4648 Level: developer 4649 4650 @*/ 4651 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4652 { 4653 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4654 PetscErrorCode ierr; 4655 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4656 IS isrowb,iscolb; 4657 Mat *bseq; 4658 4659 PetscFunctionBegin; 4660 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4661 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4662 } 4663 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4664 4665 if (scall == MAT_INITIAL_MATRIX){ 4666 start = A->cmap->rstart; 4667 cmap = a->garray; 4668 nzA = a->A->cmap->n; 4669 nzB = a->B->cmap->n; 4670 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4671 ncols = 0; 4672 for (i=0; i<nzB; i++) { /* row < local row index */ 4673 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4674 else break; 4675 } 4676 imark = i; 4677 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4678 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4679 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4680 ierr = PetscFree(idx);CHKERRQ(ierr); 4681 *brstart = imark; 4682 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4683 } else { 4684 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4685 isrowb = *rowb; iscolb = *colb; 4686 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4687 bseq[0] = *B_seq; 4688 } 4689 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4690 *B_seq = bseq[0]; 4691 ierr = PetscFree(bseq);CHKERRQ(ierr); 4692 if (!rowb){ 4693 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4694 } else { 4695 *rowb = isrowb; 4696 } 4697 if (!colb){ 4698 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4699 } else { 4700 *colb = iscolb; 4701 } 4702 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4703 PetscFunctionReturn(0); 4704 } 4705 4706 #undef __FUNCT__ 4707 #define __FUNCT__ "MatGetBrowsOfAoCols" 4708 /*@C 4709 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4710 of the OFF-DIAGONAL portion of local A 4711 4712 Collective on Mat 4713 4714 Input Parameters: 4715 + A,B - the matrices in mpiaij format 4716 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4717 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4718 . startsj_r - similar to startsj for receives 4719 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4720 4721 Output Parameter: 4722 + B_oth - the sequential matrix generated 4723 4724 Level: developer 4725 4726 @*/ 4727 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4728 { 4729 VecScatter_MPI_General *gen_to,*gen_from; 4730 PetscErrorCode ierr; 4731 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4732 Mat_SeqAIJ *b_oth; 4733 VecScatter ctx=a->Mvctx; 4734 MPI_Comm comm=((PetscObject)ctx)->comm; 4735 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4736 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4737 PetscScalar *rvalues,*svalues; 4738 MatScalar *b_otha,*bufa,*bufA; 4739 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4740 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4741 MPI_Status *sstatus,rstatus; 4742 PetscMPIInt jj; 4743 PetscInt *cols,sbs,rbs; 4744 PetscScalar *vals; 4745 4746 PetscFunctionBegin; 4747 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4748 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4749 } 4750 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4751 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4752 4753 gen_to = (VecScatter_MPI_General*)ctx->todata; 4754 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4755 rvalues = gen_from->values; /* holds the length of receiving row */ 4756 svalues = gen_to->values; /* holds the length of sending row */ 4757 nrecvs = gen_from->n; 4758 nsends = gen_to->n; 4759 4760 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4761 srow = gen_to->indices; /* local row index to be sent */ 4762 sstarts = gen_to->starts; 4763 sprocs = gen_to->procs; 4764 sstatus = gen_to->sstatus; 4765 sbs = gen_to->bs; 4766 rstarts = gen_from->starts; 4767 rprocs = gen_from->procs; 4768 rbs = gen_from->bs; 4769 4770 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4771 if (scall == MAT_INITIAL_MATRIX){ 4772 /* i-array */ 4773 /*---------*/ 4774 /* post receives */ 4775 for (i=0; i<nrecvs; i++){ 4776 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4777 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4778 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4779 } 4780 4781 /* pack the outgoing message */ 4782 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4783 sstartsj[0] = 0; rstartsj[0] = 0; 4784 len = 0; /* total length of j or a array to be sent */ 4785 k = 0; 4786 for (i=0; i<nsends; i++){ 4787 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4788 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4789 for (j=0; j<nrows; j++) { 4790 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4791 for (l=0; l<sbs; l++){ 4792 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4793 rowlen[j*sbs+l] = ncols; 4794 len += ncols; 4795 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4796 } 4797 k++; 4798 } 4799 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4800 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4801 } 4802 /* recvs and sends of i-array are completed */ 4803 i = nrecvs; 4804 while (i--) { 4805 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4806 } 4807 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4808 4809 /* allocate buffers for sending j and a arrays */ 4810 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4811 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4812 4813 /* create i-array of B_oth */ 4814 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4815 b_othi[0] = 0; 4816 len = 0; /* total length of j or a array to be received */ 4817 k = 0; 4818 for (i=0; i<nrecvs; i++){ 4819 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4820 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4821 for (j=0; j<nrows; j++) { 4822 b_othi[k+1] = b_othi[k] + rowlen[j]; 4823 len += rowlen[j]; k++; 4824 } 4825 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4826 } 4827 4828 /* allocate space for j and a arrrays of B_oth */ 4829 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4830 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4831 4832 /* j-array */ 4833 /*---------*/ 4834 /* post receives of j-array */ 4835 for (i=0; i<nrecvs; i++){ 4836 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4837 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4838 } 4839 4840 /* pack the outgoing message j-array */ 4841 k = 0; 4842 for (i=0; i<nsends; i++){ 4843 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4844 bufJ = bufj+sstartsj[i]; 4845 for (j=0; j<nrows; j++) { 4846 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4847 for (ll=0; ll<sbs; ll++){ 4848 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4849 for (l=0; l<ncols; l++){ 4850 *bufJ++ = cols[l]; 4851 } 4852 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4853 } 4854 } 4855 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4856 } 4857 4858 /* recvs and sends of j-array are completed */ 4859 i = nrecvs; 4860 while (i--) { 4861 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4862 } 4863 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4864 } else if (scall == MAT_REUSE_MATRIX){ 4865 sstartsj = *startsj; 4866 rstartsj = *startsj_r; 4867 bufa = *bufa_ptr; 4868 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4869 b_otha = b_oth->a; 4870 } else { 4871 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4872 } 4873 4874 /* a-array */ 4875 /*---------*/ 4876 /* post receives of a-array */ 4877 for (i=0; i<nrecvs; i++){ 4878 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4879 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4880 } 4881 4882 /* pack the outgoing message a-array */ 4883 k = 0; 4884 for (i=0; i<nsends; i++){ 4885 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4886 bufA = bufa+sstartsj[i]; 4887 for (j=0; j<nrows; j++) { 4888 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4889 for (ll=0; ll<sbs; ll++){ 4890 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4891 for (l=0; l<ncols; l++){ 4892 *bufA++ = vals[l]; 4893 } 4894 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4895 } 4896 } 4897 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4898 } 4899 /* recvs and sends of a-array are completed */ 4900 i = nrecvs; 4901 while (i--) { 4902 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4903 } 4904 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4905 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4906 4907 if (scall == MAT_INITIAL_MATRIX){ 4908 /* put together the new matrix */ 4909 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4910 4911 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4912 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4913 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4914 b_oth->free_a = PETSC_TRUE; 4915 b_oth->free_ij = PETSC_TRUE; 4916 b_oth->nonew = 0; 4917 4918 ierr = PetscFree(bufj);CHKERRQ(ierr); 4919 if (!startsj || !bufa_ptr){ 4920 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4921 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4922 } else { 4923 *startsj = sstartsj; 4924 *startsj_r = rstartsj; 4925 *bufa_ptr = bufa; 4926 } 4927 } 4928 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4929 PetscFunctionReturn(0); 4930 } 4931 4932 #undef __FUNCT__ 4933 #define __FUNCT__ "MatGetCommunicationStructs" 4934 /*@C 4935 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4936 4937 Not Collective 4938 4939 Input Parameters: 4940 . A - The matrix in mpiaij format 4941 4942 Output Parameter: 4943 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4944 . colmap - A map from global column index to local index into lvec 4945 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4946 4947 Level: developer 4948 4949 @*/ 4950 #if defined (PETSC_USE_CTABLE) 4951 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4952 #else 4953 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4954 #endif 4955 { 4956 Mat_MPIAIJ *a; 4957 4958 PetscFunctionBegin; 4959 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4960 PetscValidPointer(lvec, 2); 4961 PetscValidPointer(colmap, 3); 4962 PetscValidPointer(multScatter, 4); 4963 a = (Mat_MPIAIJ *) A->data; 4964 if (lvec) *lvec = a->lvec; 4965 if (colmap) *colmap = a->colmap; 4966 if (multScatter) *multScatter = a->Mvctx; 4967 PetscFunctionReturn(0); 4968 } 4969 4970 EXTERN_C_BEGIN 4971 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4972 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4973 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4974 EXTERN_C_END 4975 4976 #undef __FUNCT__ 4977 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4978 /* 4979 Computes (B'*A')' since computing B*A directly is untenable 4980 4981 n p p 4982 ( ) ( ) ( ) 4983 m ( A ) * n ( B ) = m ( C ) 4984 ( ) ( ) ( ) 4985 4986 */ 4987 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4988 { 4989 PetscErrorCode ierr; 4990 Mat At,Bt,Ct; 4991 4992 PetscFunctionBegin; 4993 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4994 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4995 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4996 ierr = MatDestroy(At);CHKERRQ(ierr); 4997 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4998 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4999 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 5003 #undef __FUNCT__ 5004 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5005 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5006 { 5007 PetscErrorCode ierr; 5008 PetscInt m=A->rmap->n,n=B->cmap->n; 5009 Mat Cmat; 5010 5011 PetscFunctionBegin; 5012 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5013 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5014 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5015 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5016 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5017 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5018 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5019 *C = Cmat; 5020 PetscFunctionReturn(0); 5021 } 5022 5023 /* ----------------------------------------------------------------*/ 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5026 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5027 { 5028 PetscErrorCode ierr; 5029 5030 PetscFunctionBegin; 5031 if (scall == MAT_INITIAL_MATRIX){ 5032 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5033 } 5034 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5035 PetscFunctionReturn(0); 5036 } 5037 5038 EXTERN_C_BEGIN 5039 #if defined(PETSC_HAVE_MUMPS) 5040 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5041 #endif 5042 #if defined(PETSC_HAVE_PASTIX) 5043 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5044 #endif 5045 #if defined(PETSC_HAVE_SUPERLU_DIST) 5046 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5047 #endif 5048 #if defined(PETSC_HAVE_SPOOLES) 5049 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5050 #endif 5051 EXTERN_C_END 5052 5053 /*MC 5054 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5055 5056 Options Database Keys: 5057 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5058 5059 Level: beginner 5060 5061 .seealso: MatCreateMPIAIJ() 5062 M*/ 5063 5064 EXTERN_C_BEGIN 5065 #undef __FUNCT__ 5066 #define __FUNCT__ "MatCreate_MPIAIJ" 5067 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5068 { 5069 Mat_MPIAIJ *b; 5070 PetscErrorCode ierr; 5071 PetscMPIInt size; 5072 5073 PetscFunctionBegin; 5074 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5075 5076 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5077 B->data = (void*)b; 5078 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5079 B->rmap->bs = 1; 5080 B->assembled = PETSC_FALSE; 5081 B->mapping = 0; 5082 5083 B->insertmode = NOT_SET_VALUES; 5084 b->size = size; 5085 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5086 5087 /* build cache for off array entries formed */ 5088 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5089 b->donotstash = PETSC_FALSE; 5090 b->colmap = 0; 5091 b->garray = 0; 5092 b->roworiented = PETSC_TRUE; 5093 5094 /* stuff used for matrix vector multiply */ 5095 b->lvec = PETSC_NULL; 5096 b->Mvctx = PETSC_NULL; 5097 5098 /* stuff for MatGetRow() */ 5099 b->rowindices = 0; 5100 b->rowvalues = 0; 5101 b->getrowactive = PETSC_FALSE; 5102 5103 #if defined(PETSC_HAVE_SPOOLES) 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5105 "MatGetFactor_mpiaij_spooles", 5106 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5107 #endif 5108 #if defined(PETSC_HAVE_MUMPS) 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5110 "MatGetFactor_aij_mumps", 5111 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5112 #endif 5113 #if defined(PETSC_HAVE_PASTIX) 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5115 "MatGetFactor_mpiaij_pastix", 5116 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5117 #endif 5118 #if defined(PETSC_HAVE_SUPERLU_DIST) 5119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5120 "MatGetFactor_mpiaij_superlu_dist", 5121 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5122 #endif 5123 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5124 "MatStoreValues_MPIAIJ", 5125 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5126 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5127 "MatRetrieveValues_MPIAIJ", 5128 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5129 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5130 "MatGetDiagonalBlock_MPIAIJ", 5131 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5132 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5133 "MatIsTranspose_MPIAIJ", 5134 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5135 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5136 "MatMPIAIJSetPreallocation_MPIAIJ", 5137 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5138 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5139 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5140 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5141 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5142 "MatDiagonalScaleLocal_MPIAIJ", 5143 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5144 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5145 "MatConvert_MPIAIJ_MPIAIJPERM", 5146 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5147 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5148 "MatConvert_MPIAIJ_MPIAIJCRL", 5149 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5151 "MatConvert_MPIAIJ_MPISBAIJ", 5152 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5153 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5154 "MatMatMult_MPIDense_MPIAIJ", 5155 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5156 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5157 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5158 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5159 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5160 "MatMatMultNumeric_MPIDense_MPIAIJ", 5161 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5162 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5163 PetscFunctionReturn(0); 5164 } 5165 EXTERN_C_END 5166 5167 #undef __FUNCT__ 5168 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5169 /*@ 5170 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5171 and "off-diagonal" part of the matrix in CSR format. 5172 5173 Collective on MPI_Comm 5174 5175 Input Parameters: 5176 + comm - MPI communicator 5177 . m - number of local rows (Cannot be PETSC_DECIDE) 5178 . n - This value should be the same as the local size used in creating the 5179 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5180 calculated if N is given) For square matrices n is almost always m. 5181 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5182 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5183 . i - row indices for "diagonal" portion of matrix 5184 . j - column indices 5185 . a - matrix values 5186 . oi - row indices for "off-diagonal" portion of matrix 5187 . oj - column indices 5188 - oa - matrix values 5189 5190 Output Parameter: 5191 . mat - the matrix 5192 5193 Level: advanced 5194 5195 Notes: 5196 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5197 5198 The i and j indices are 0 based 5199 5200 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5201 5202 This sets local rows and cannot be used to set off-processor values. 5203 5204 You cannot later use MatSetValues() to change values in this matrix. 5205 5206 .keywords: matrix, aij, compressed row, sparse, parallel 5207 5208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5209 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5210 @*/ 5211 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5212 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5213 { 5214 PetscErrorCode ierr; 5215 Mat_MPIAIJ *maij; 5216 5217 PetscFunctionBegin; 5218 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5219 if (i[0]) { 5220 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5221 } 5222 if (oi[0]) { 5223 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5224 } 5225 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5226 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5227 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5228 maij = (Mat_MPIAIJ*) (*mat)->data; 5229 maij->donotstash = PETSC_TRUE; 5230 (*mat)->preallocated = PETSC_TRUE; 5231 5232 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5233 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5234 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5235 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5236 5237 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5238 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5239 5240 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5241 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5242 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5243 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5244 5245 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5246 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5247 PetscFunctionReturn(0); 5248 } 5249 5250 /* 5251 Special version for direct calls from Fortran 5252 */ 5253 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5254 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5255 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5256 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5257 #endif 5258 5259 /* Change these macros so can be used in void function */ 5260 #undef CHKERRQ 5261 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5262 #undef SETERRQ2 5263 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5264 #undef SETERRQ 5265 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5266 5267 EXTERN_C_BEGIN 5268 #undef __FUNCT__ 5269 #define __FUNCT__ "matsetvaluesmpiaij_" 5270 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5271 { 5272 Mat mat = *mmat; 5273 PetscInt m = *mm, n = *mn; 5274 InsertMode addv = *maddv; 5275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5276 PetscScalar value; 5277 PetscErrorCode ierr; 5278 5279 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5280 if (mat->insertmode == NOT_SET_VALUES) { 5281 mat->insertmode = addv; 5282 } 5283 #if defined(PETSC_USE_DEBUG) 5284 else if (mat->insertmode != addv) { 5285 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5286 } 5287 #endif 5288 { 5289 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5290 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5291 PetscTruth roworiented = aij->roworiented; 5292 5293 /* Some Variables required in the macro */ 5294 Mat A = aij->A; 5295 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5296 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5297 MatScalar *aa = a->a; 5298 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5299 Mat B = aij->B; 5300 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5301 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5302 MatScalar *ba = b->a; 5303 5304 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5305 PetscInt nonew = a->nonew; 5306 MatScalar *ap1,*ap2; 5307 5308 PetscFunctionBegin; 5309 for (i=0; i<m; i++) { 5310 if (im[i] < 0) continue; 5311 #if defined(PETSC_USE_DEBUG) 5312 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5313 #endif 5314 if (im[i] >= rstart && im[i] < rend) { 5315 row = im[i] - rstart; 5316 lastcol1 = -1; 5317 rp1 = aj + ai[row]; 5318 ap1 = aa + ai[row]; 5319 rmax1 = aimax[row]; 5320 nrow1 = ailen[row]; 5321 low1 = 0; 5322 high1 = nrow1; 5323 lastcol2 = -1; 5324 rp2 = bj + bi[row]; 5325 ap2 = ba + bi[row]; 5326 rmax2 = bimax[row]; 5327 nrow2 = bilen[row]; 5328 low2 = 0; 5329 high2 = nrow2; 5330 5331 for (j=0; j<n; j++) { 5332 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5333 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5334 if (in[j] >= cstart && in[j] < cend){ 5335 col = in[j] - cstart; 5336 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5337 } else if (in[j] < 0) continue; 5338 #if defined(PETSC_USE_DEBUG) 5339 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5340 #endif 5341 else { 5342 if (mat->was_assembled) { 5343 if (!aij->colmap) { 5344 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5345 } 5346 #if defined (PETSC_USE_CTABLE) 5347 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5348 col--; 5349 #else 5350 col = aij->colmap[in[j]] - 1; 5351 #endif 5352 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5353 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5354 col = in[j]; 5355 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5356 B = aij->B; 5357 b = (Mat_SeqAIJ*)B->data; 5358 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5359 rp2 = bj + bi[row]; 5360 ap2 = ba + bi[row]; 5361 rmax2 = bimax[row]; 5362 nrow2 = bilen[row]; 5363 low2 = 0; 5364 high2 = nrow2; 5365 bm = aij->B->rmap->n; 5366 ba = b->a; 5367 } 5368 } else col = in[j]; 5369 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5370 } 5371 } 5372 } else { 5373 if (!aij->donotstash) { 5374 if (roworiented) { 5375 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5376 } else { 5377 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5378 } 5379 } 5380 } 5381 }} 5382 PetscFunctionReturnVoid(); 5383 } 5384 EXTERN_C_END 5385 5386