1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SYMMETRIC: 1442 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1446 break; 1447 case MAT_HERMITIAN: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetRow_MPIAIJ" 1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1465 PetscErrorCode ierr; 1466 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1467 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1468 PetscInt *cmap,*idx_p; 1469 1470 PetscFunctionBegin; 1471 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1472 mat->getrowactive = PETSC_TRUE; 1473 1474 if (!mat->rowvalues && (idx || v)) { 1475 /* 1476 allocate enough space to hold information from the longest row. 1477 */ 1478 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1479 PetscInt max = 1,tmp; 1480 for (i=0; i<matin->rmap->n; i++) { 1481 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1482 if (max < tmp) { max = tmp; } 1483 } 1484 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1485 } 1486 1487 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1488 lrow = row - rstart; 1489 1490 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1491 if (!v) {pvA = 0; pvB = 0;} 1492 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1493 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1495 nztot = nzA + nzB; 1496 1497 cmap = mat->garray; 1498 if (v || idx) { 1499 if (nztot) { 1500 /* Sort by increasing column numbers, assuming A and B already sorted */ 1501 PetscInt imark = -1; 1502 if (v) { 1503 *v = v_p = mat->rowvalues; 1504 for (i=0; i<nzB; i++) { 1505 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1506 else break; 1507 } 1508 imark = i; 1509 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1510 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1511 } 1512 if (idx) { 1513 *idx = idx_p = mat->rowindices; 1514 if (imark > -1) { 1515 for (i=0; i<imark; i++) { 1516 idx_p[i] = cmap[cworkB[i]]; 1517 } 1518 } else { 1519 for (i=0; i<nzB; i++) { 1520 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1521 else break; 1522 } 1523 imark = i; 1524 } 1525 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1526 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 if (idx) *idx = 0; 1530 if (v) *v = 0; 1531 } 1532 } 1533 *nz = nztot; 1534 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1542 { 1543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1544 1545 PetscFunctionBegin; 1546 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1547 aij->getrowactive = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatNorm_MPIAIJ" 1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1556 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1557 PetscErrorCode ierr; 1558 PetscInt i,j,cstart = mat->cmap->rstart; 1559 PetscReal sum = 0.0; 1560 MatScalar *v; 1561 1562 PetscFunctionBegin; 1563 if (aij->size == 1) { 1564 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1565 } else { 1566 if (type == NORM_FROBENIUS) { 1567 v = amat->a; 1568 for (i=0; i<amat->nz; i++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1571 #else 1572 sum += (*v)*(*v); v++; 1573 #endif 1574 } 1575 v = bmat->a; 1576 for (i=0; i<bmat->nz; i++) { 1577 #if defined(PETSC_USE_COMPLEX) 1578 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1579 #else 1580 sum += (*v)*(*v); v++; 1581 #endif 1582 } 1583 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1584 *norm = sqrt(*norm); 1585 } else if (type == NORM_1) { /* max column norm */ 1586 PetscReal *tmp,*tmp2; 1587 PetscInt *jj,*garray = aij->garray; 1588 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1589 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1590 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1591 *norm = 0.0; 1592 v = amat->a; jj = amat->j; 1593 for (j=0; j<amat->nz; j++) { 1594 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1595 } 1596 v = bmat->a; jj = bmat->j; 1597 for (j=0; j<bmat->nz; j++) { 1598 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1599 } 1600 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 for (j=0; j<mat->cmap->N; j++) { 1602 if (tmp2[j] > *norm) *norm = tmp2[j]; 1603 } 1604 ierr = PetscFree(tmp);CHKERRQ(ierr); 1605 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1606 } else if (type == NORM_INFINITY) { /* max row norm */ 1607 PetscReal ntemp = 0.0; 1608 for (j=0; j<aij->A->rmap->n; j++) { 1609 v = amat->a + amat->i[j]; 1610 sum = 0.0; 1611 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1612 sum += PetscAbsScalar(*v); v++; 1613 } 1614 v = bmat->a + bmat->i[j]; 1615 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 if (sum > ntemp) ntemp = sum; 1619 } 1620 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1621 } else { 1622 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1623 } 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIAIJ" 1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1633 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1634 PetscErrorCode ierr; 1635 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1636 PetscInt cstart=A->cmap->rstart,ncol; 1637 Mat B; 1638 MatScalar *array; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 1643 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1644 ai = Aloc->i; aj = Aloc->j; 1645 bi = Bloc->i; bj = Bloc->j; 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1648 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1649 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1650 for (i=0; i<ai[ma]; i++){ 1651 d_nnz[aj[i]] ++; 1652 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1653 } 1654 1655 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1656 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1657 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1658 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1659 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1660 } else { 1661 B = *matout; 1662 } 1663 1664 /* copy over the A part */ 1665 array = Aloc->a; 1666 row = A->rmap->rstart; 1667 for (i=0; i<ma; i++) { 1668 ncol = ai[i+1]-ai[i]; 1669 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1670 row++; array += ncol; aj += ncol; 1671 } 1672 aj = Aloc->j; 1673 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1674 1675 /* copy over the B part */ 1676 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1677 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1678 array = Bloc->a; 1679 row = A->rmap->rstart; 1680 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1681 cols_tmp = cols; 1682 for (i=0; i<mb; i++) { 1683 ncol = bi[i+1]-bi[i]; 1684 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1685 row++; array += ncol; cols_tmp += ncol; 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1692 *matout = B; 1693 } else { 1694 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1702 { 1703 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1704 Mat a = aij->A,b = aij->B; 1705 PetscErrorCode ierr; 1706 PetscInt s1,s2,s3; 1707 1708 PetscFunctionBegin; 1709 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1710 if (rr) { 1711 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1712 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1713 /* Overlap communication with computation. */ 1714 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 } 1716 if (ll) { 1717 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1718 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1719 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1720 } 1721 /* scale the diagonal block */ 1722 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1723 1724 if (rr) { 1725 /* Do a scatter end and then right scale the off-diagonal block */ 1726 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1728 } 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1742 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1743 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1744 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1750 { 1751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatEqual_MPIAIJ" 1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1762 { 1763 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1764 Mat a,b,c,d; 1765 PetscTruth flg; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; b = matA->B; 1770 c = matB->A; d = matB->B; 1771 1772 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1773 if (flg) { 1774 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1775 } 1776 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCopy_MPIAIJ" 1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1783 { 1784 PetscErrorCode ierr; 1785 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1786 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1787 1788 PetscFunctionBegin; 1789 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1790 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1791 /* because of the column compression in the off-processor part of the matrix a->B, 1792 the number of columns in a->B and b->B may be different, hence we cannot call 1793 the MatCopy() directly on the two parts. If need be, we can provide a more 1794 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1795 then copying the submatrices */ 1796 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1799 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1807 { 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatAXPY_MPIAIJ" 1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 PetscInt i; 1821 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1822 PetscBLASInt bnz,one=1; 1823 Mat_SeqAIJ *x,*y; 1824 1825 PetscFunctionBegin; 1826 if (str == SAME_NONZERO_PATTERN) { 1827 PetscScalar alpha = a; 1828 x = (Mat_SeqAIJ *)xx->A->data; 1829 y = (Mat_SeqAIJ *)yy->A->data; 1830 bnz = PetscBLASIntCast(x->nz); 1831 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1832 x = (Mat_SeqAIJ *)xx->B->data; 1833 y = (Mat_SeqAIJ *)yy->B->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 } else if (str == SUBSET_NONZERO_PATTERN) { 1837 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1838 1839 x = (Mat_SeqAIJ *)xx->B->data; 1840 y = (Mat_SeqAIJ *)yy->B->data; 1841 if (y->xtoy && y->XtoY != xx->B) { 1842 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1843 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1844 } 1845 if (!y->xtoy) { /* get xtoy */ 1846 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1847 y->XtoY = xx->B; 1848 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1849 } 1850 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1851 } else { 1852 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatConjugate_MPIAIJ" 1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1862 { 1863 #if defined(PETSC_USE_COMPLEX) 1864 PetscErrorCode ierr; 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1866 1867 PetscFunctionBegin; 1868 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1869 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1870 #else 1871 PetscFunctionBegin; 1872 #endif 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatRealPart_MPIAIJ" 1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1879 { 1880 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1885 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1892 { 1893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 1896 PetscFunctionBegin; 1897 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1898 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #ifdef PETSC_HAVE_PBGL 1903 1904 #include <boost/parallel/mpi/bsp_process_group.hpp> 1905 #include <boost/graph/distributed/ilu_default_graph.hpp> 1906 #include <boost/graph/distributed/ilu_0_block.hpp> 1907 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1908 #include <boost/graph/distributed/petsc/interface.hpp> 1909 #include <boost/multi_array.hpp> 1910 #include <boost/parallel/distributed_property_map->hpp> 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1914 /* 1915 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1916 */ 1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1918 { 1919 namespace petsc = boost::distributed::petsc; 1920 1921 namespace graph_dist = boost::graph::distributed; 1922 using boost::graph::distributed::ilu_default::process_group_type; 1923 using boost::graph::ilu_permuted; 1924 1925 PetscTruth row_identity, col_identity; 1926 PetscContainer c; 1927 PetscInt m, n, M, N; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1932 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1933 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1934 if (!row_identity || !col_identity) { 1935 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1936 } 1937 1938 process_group_type pg; 1939 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1940 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1941 lgraph_type& level_graph = *lgraph_p; 1942 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1943 1944 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1945 ilu_permuted(level_graph); 1946 1947 /* put together the new matrix */ 1948 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1949 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1950 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1951 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1952 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 1956 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1957 ierr = PetscContainerSetPointer(c, lgraph_p); 1958 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1965 { 1966 PetscFunctionBegin; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatSolve_MPIAIJ" 1972 /* 1973 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1974 */ 1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1976 { 1977 namespace graph_dist = boost::graph::distributed; 1978 1979 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1980 lgraph_type* lgraph_p; 1981 PetscContainer c; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1986 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1987 ierr = VecCopy(b, x);CHKERRQ(ierr); 1988 1989 PetscScalar* array_x; 1990 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1991 PetscInt sx; 1992 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1993 1994 PetscScalar* array_b; 1995 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1996 PetscInt sb; 1997 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1998 1999 lgraph_type& level_graph = *lgraph_p; 2000 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2001 2002 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2003 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2004 ref_x(array_x, boost::extents[num_vertices(graph)]); 2005 2006 typedef boost::iterator_property_map<array_ref_type::iterator, 2007 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2008 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2009 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2010 2011 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2012 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2018 PetscInt nzlocal,nsends,nrecvs; 2019 PetscMPIInt *send_rank,*recv_rank; 2020 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2021 PetscScalar *sbuf_a,**rbuf_a; 2022 PetscErrorCode (*MatDestroy)(Mat); 2023 } Mat_Redundant; 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2028 { 2029 PetscErrorCode ierr; 2030 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2031 PetscInt i; 2032 2033 PetscFunctionBegin; 2034 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2035 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2036 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2037 for (i=0; i<redund->nrecvs; i++){ 2038 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2042 ierr = PetscFree(redund);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDestroy_MatRedundant" 2048 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2049 { 2050 PetscErrorCode ierr; 2051 PetscContainer container; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 2054 PetscFunctionBegin; 2055 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 A->ops->destroy = redund->MatDestroy; 2062 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2063 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2064 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2071 { 2072 PetscMPIInt rank,size; 2073 MPI_Comm comm=((PetscObject)mat)->comm; 2074 PetscErrorCode ierr; 2075 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2076 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2077 PetscInt *rowrange=mat->rmap->range; 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat A=aij->A,B=aij->B,C=*matredundant; 2080 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2081 PetscScalar *sbuf_a; 2082 PetscInt nzlocal=a->nz+b->nz; 2083 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2084 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2085 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2086 MatScalar *aworkA,*aworkB; 2087 PetscScalar *vals; 2088 PetscMPIInt tag1,tag2,tag3,imdex; 2089 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2090 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2091 MPI_Status recv_status,*send_status; 2092 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2093 PetscInt **rbuf_j=PETSC_NULL; 2094 PetscScalar **rbuf_a=PETSC_NULL; 2095 Mat_Redundant *redund=PETSC_NULL; 2096 PetscContainer container; 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (reuse == MAT_REUSE_MATRIX) { 2103 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2104 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2105 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2107 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2108 if (container) { 2109 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2110 } else { 2111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2112 } 2113 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2114 2115 nsends = redund->nsends; 2116 nrecvs = redund->nrecvs; 2117 send_rank = redund->send_rank; 2118 recv_rank = redund->recv_rank; 2119 sbuf_nz = redund->sbuf_nz; 2120 rbuf_nz = redund->rbuf_nz; 2121 sbuf_j = redund->sbuf_j; 2122 sbuf_a = redund->sbuf_a; 2123 rbuf_j = redund->rbuf_j; 2124 rbuf_a = redund->rbuf_a; 2125 } 2126 2127 if (reuse == MAT_INITIAL_MATRIX){ 2128 PetscMPIInt subrank,subsize; 2129 PetscInt nleftover,np_subcomm; 2130 /* get the destination processors' id send_rank, nsends and nrecvs */ 2131 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2132 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2133 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2134 np_subcomm = size/nsubcomm; 2135 nleftover = size - nsubcomm*np_subcomm; 2136 nsends = 0; nrecvs = 0; 2137 for (i=0; i<size; i++){ /* i=rank*/ 2138 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2139 send_rank[nsends] = i; nsends++; 2140 recv_rank[nrecvs++] = i; 2141 } 2142 } 2143 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2144 i = size-nleftover-1; 2145 j = 0; 2146 while (j < nsubcomm - nleftover){ 2147 send_rank[nsends++] = i; 2148 i--; j++; 2149 } 2150 } 2151 2152 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2153 for (i=0; i<nleftover; i++){ 2154 recv_rank[nrecvs++] = size-nleftover+i; 2155 } 2156 } 2157 2158 /* allocate sbuf_j, sbuf_a */ 2159 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2160 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2161 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2162 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2163 2164 /* copy mat's local entries into the buffers */ 2165 if (reuse == MAT_INITIAL_MATRIX){ 2166 rownz_max = 0; 2167 rptr = sbuf_j; 2168 cols = sbuf_j + rend-rstart + 1; 2169 vals = sbuf_a; 2170 rptr[0] = 0; 2171 for (i=0; i<rend-rstart; i++){ 2172 row = i + rstart; 2173 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2174 ncols = nzA + nzB; 2175 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2176 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2177 /* load the column indices for this row into cols */ 2178 lwrite = 0; 2179 for (l=0; l<nzB; l++) { 2180 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2181 vals[lwrite] = aworkB[l]; 2182 cols[lwrite++] = ctmp; 2183 } 2184 } 2185 for (l=0; l<nzA; l++){ 2186 vals[lwrite] = aworkA[l]; 2187 cols[lwrite++] = cstart + cworkA[l]; 2188 } 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 vals += ncols; 2196 cols += ncols; 2197 rptr[i+1] = rptr[i] + ncols; 2198 if (rownz_max < ncols) rownz_max = ncols; 2199 } 2200 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2201 } else { /* only copy matrix values into sbuf_a */ 2202 rptr = sbuf_j; 2203 vals = sbuf_a; 2204 rptr[0] = 0; 2205 for (i=0; i<rend-rstart; i++){ 2206 row = i + rstart; 2207 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2208 ncols = nzA + nzB; 2209 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2210 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2214 } 2215 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2218 } 2219 vals += ncols; 2220 rptr[i+1] = rptr[i] + ncols; 2221 } 2222 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2223 2224 /* send nzlocal to others, and recv other's nzlocal */ 2225 /*--------------------------------------------------*/ 2226 if (reuse == MAT_INITIAL_MATRIX){ 2227 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2228 s_waits2 = s_waits3 + nsends; 2229 s_waits1 = s_waits2 + nsends; 2230 r_waits1 = s_waits1 + nsends; 2231 r_waits2 = r_waits1 + nrecvs; 2232 r_waits3 = r_waits2 + nrecvs; 2233 } else { 2234 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2235 r_waits3 = s_waits3 + nsends; 2236 } 2237 2238 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2239 if (reuse == MAT_INITIAL_MATRIX){ 2240 /* get new tags to keep the communication clean */ 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2243 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2244 2245 /* post receives of other's nzlocal */ 2246 for (i=0; i<nrecvs; i++){ 2247 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2248 } 2249 /* send nzlocal to others */ 2250 for (i=0; i<nsends; i++){ 2251 sbuf_nz[i] = nzlocal; 2252 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2253 } 2254 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2255 count = nrecvs; 2256 while (count) { 2257 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2258 recv_rank[imdex] = recv_status.MPI_SOURCE; 2259 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2260 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2261 2262 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2263 rbuf_nz[imdex] += i + 2; 2264 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2265 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2266 count--; 2267 } 2268 /* wait on sends of nzlocal */ 2269 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2270 /* send mat->i,j to others, and recv from other's */ 2271 /*------------------------------------------------*/ 2272 for (i=0; i<nsends; i++){ 2273 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2274 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2275 } 2276 /* wait on receives of mat->i,j */ 2277 /*------------------------------*/ 2278 count = nrecvs; 2279 while (count) { 2280 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2281 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2282 count--; 2283 } 2284 /* wait on sends of mat->i,j */ 2285 /*---------------------------*/ 2286 if (nsends) { 2287 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2288 } 2289 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2290 2291 /* post receives, send and receive mat->a */ 2292 /*----------------------------------------*/ 2293 for (imdex=0; imdex<nrecvs; imdex++) { 2294 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2295 } 2296 for (i=0; i<nsends; i++){ 2297 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2298 } 2299 count = nrecvs; 2300 while (count) { 2301 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2302 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2303 count--; 2304 } 2305 if (nsends) { 2306 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2307 } 2308 2309 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2310 2311 /* create redundant matrix */ 2312 /*-------------------------*/ 2313 if (reuse == MAT_INITIAL_MATRIX){ 2314 /* compute rownz_max for preallocation */ 2315 for (imdex=0; imdex<nrecvs; imdex++){ 2316 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2317 rptr = rbuf_j[imdex]; 2318 for (i=0; i<j; i++){ 2319 ncols = rptr[i+1] - rptr[i]; 2320 if (rownz_max < ncols) rownz_max = ncols; 2321 } 2322 } 2323 2324 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2325 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2326 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2327 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2328 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2329 } else { 2330 C = *matredundant; 2331 } 2332 2333 /* insert local matrix entries */ 2334 rptr = sbuf_j; 2335 cols = sbuf_j + rend-rstart + 1; 2336 vals = sbuf_a; 2337 for (i=0; i<rend-rstart; i++){ 2338 row = i + rstart; 2339 ncols = rptr[i+1] - rptr[i]; 2340 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 vals += ncols; 2342 cols += ncols; 2343 } 2344 /* insert received matrix entries */ 2345 for (imdex=0; imdex<nrecvs; imdex++){ 2346 rstart = rowrange[recv_rank[imdex]]; 2347 rend = rowrange[recv_rank[imdex]+1]; 2348 rptr = rbuf_j[imdex]; 2349 cols = rbuf_j[imdex] + rend-rstart + 1; 2350 vals = rbuf_a[imdex]; 2351 for (i=0; i<rend-rstart; i++){ 2352 row = i + rstart; 2353 ncols = rptr[i+1] - rptr[i]; 2354 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 vals += ncols; 2356 cols += ncols; 2357 } 2358 } 2359 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2362 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2363 if (reuse == MAT_INITIAL_MATRIX){ 2364 PetscContainer container; 2365 *matredundant = C; 2366 /* create a supporting struct and attach it to C for reuse */ 2367 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2368 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2369 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2370 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2372 2373 redund->nzlocal = nzlocal; 2374 redund->nsends = nsends; 2375 redund->nrecvs = nrecvs; 2376 redund->send_rank = send_rank; 2377 redund->recv_rank = recv_rank; 2378 redund->sbuf_nz = sbuf_nz; 2379 redund->rbuf_nz = rbuf_nz; 2380 redund->sbuf_j = sbuf_j; 2381 redund->sbuf_a = sbuf_a; 2382 redund->rbuf_j = rbuf_j; 2383 redund->rbuf_a = rbuf_a; 2384 2385 redund->MatDestroy = C->ops->destroy; 2386 C->ops->destroy = MatDestroy_MatRedundant; 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->rmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++){ 2418 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 if (idxb) { 2427 ierr = PetscFree(idxb);CHKERRQ(ierr); 2428 } 2429 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 PetscErrorCode ierr; 2439 PetscInt i,*idxb = 0; 2440 PetscScalar *va,*vb; 2441 Vec vtmp; 2442 2443 PetscFunctionBegin; 2444 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2446 if (idx) { 2447 for (i=0; i<A->cmap->n; i++) { 2448 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2449 } 2450 } 2451 2452 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2453 if (idx) { 2454 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2455 } 2456 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2457 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2458 2459 for (i=0; i<A->rmap->n; i++){ 2460 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2461 va[i] = vb[i]; 2462 if (idx) idx[i] = a->garray[idxb[i]]; 2463 } 2464 } 2465 2466 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2468 if (idxb) { 2469 ierr = PetscFree(idxb);CHKERRQ(ierr); 2470 } 2471 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2478 { 2479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2480 PetscInt n = A->rmap->n; 2481 PetscInt cstart = A->cmap->rstart; 2482 PetscInt *cmap = mat->garray; 2483 PetscInt *diagIdx, *offdiagIdx; 2484 Vec diagV, offdiagV; 2485 PetscScalar *a, *diagA, *offdiagA; 2486 PetscInt r; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2492 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2493 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2494 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2496 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2497 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2498 for(r = 0; r < n; ++r) { 2499 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2500 a[r] = diagA[r]; 2501 idx[r] = cstart + diagIdx[r]; 2502 } else { 2503 a[r] = offdiagA[r]; 2504 idx[r] = cmap[offdiagIdx[r]]; 2505 } 2506 } 2507 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2508 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2509 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2510 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2511 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2512 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2519 { 2520 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2521 PetscInt n = A->rmap->n; 2522 PetscInt cstart = A->cmap->rstart; 2523 PetscInt *cmap = mat->garray; 2524 PetscInt *diagIdx, *offdiagIdx; 2525 Vec diagV, offdiagV; 2526 PetscScalar *a, *diagA, *offdiagA; 2527 PetscInt r; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2532 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2533 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2534 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2535 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2537 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2538 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2539 for(r = 0; r < n; ++r) { 2540 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 idx[r] = cstart + diagIdx[r]; 2543 } else { 2544 a[r] = offdiagA[r]; 2545 idx[r] = cmap[offdiagIdx[r]]; 2546 } 2547 } 2548 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2551 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2552 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2553 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2560 { 2561 PetscErrorCode ierr; 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2566 *newmat = *dummy; 2567 ierr = PetscFree(dummy);CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2572 /* -------------------------------------------------------------------*/ 2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2574 MatGetRow_MPIAIJ, 2575 MatRestoreRow_MPIAIJ, 2576 MatMult_MPIAIJ, 2577 /* 4*/ MatMultAdd_MPIAIJ, 2578 MatMultTranspose_MPIAIJ, 2579 MatMultTransposeAdd_MPIAIJ, 2580 #ifdef PETSC_HAVE_PBGL 2581 MatSolve_MPIAIJ, 2582 #else 2583 0, 2584 #endif 2585 0, 2586 0, 2587 /*10*/ 0, 2588 0, 2589 0, 2590 MatSOR_MPIAIJ, 2591 MatTranspose_MPIAIJ, 2592 /*15*/ MatGetInfo_MPIAIJ, 2593 MatEqual_MPIAIJ, 2594 MatGetDiagonal_MPIAIJ, 2595 MatDiagonalScale_MPIAIJ, 2596 MatNorm_MPIAIJ, 2597 /*20*/ MatAssemblyBegin_MPIAIJ, 2598 MatAssemblyEnd_MPIAIJ, 2599 MatSetOption_MPIAIJ, 2600 MatZeroEntries_MPIAIJ, 2601 /*24*/ MatZeroRows_MPIAIJ, 2602 0, 2603 #ifdef PETSC_HAVE_PBGL 2604 0, 2605 #else 2606 0, 2607 #endif 2608 0, 2609 0, 2610 /*29*/ MatSetUpPreallocation_MPIAIJ, 2611 #ifdef PETSC_HAVE_PBGL 2612 0, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 /*49*/ MatSetBlockSize_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 0, 2649 /*64*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 #if defined(PETSC_HAVE_ADIC) 2659 MatSetValuesAdic_MPIAIJ, 2660 #else 2661 0, 2662 #endif 2663 MatSetValuesAdifor_MPIAIJ, 2664 /*75*/ MatFDColoringApply_AIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*80*/ 0, 2670 0, 2671 0, 2672 /*83*/ MatLoad_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2679 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2680 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2681 MatPtAP_Basic, 2682 MatPtAPSymbolic_MPIAIJ, 2683 /*94*/ MatPtAPNumeric_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*99*/ 0, 2689 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2690 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2691 MatConjugate_MPIAIJ, 2692 0, 2693 /*104*/MatSetValuesRow_MPIAIJ, 2694 MatRealPart_MPIAIJ, 2695 MatImaginaryPart_MPIAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 MatGetRedundantMatrix_MPIAIJ, 2700 MatGetRowMin_MPIAIJ, 2701 0, 2702 0, 2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /*119*/0, 2709 0, 2710 0, 2711 0 2712 }; 2713 2714 /* ----------------------------------------------------------------------------------------*/ 2715 2716 EXTERN_C_BEGIN 2717 #undef __FUNCT__ 2718 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2719 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2720 { 2721 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2722 PetscErrorCode ierr; 2723 2724 PetscFunctionBegin; 2725 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2726 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2727 PetscFunctionReturn(0); 2728 } 2729 EXTERN_C_END 2730 2731 EXTERN_C_BEGIN 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2734 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2735 { 2736 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2741 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 EXTERN_C_END 2745 2746 #include "petscpc.h" 2747 EXTERN_C_BEGIN 2748 #undef __FUNCT__ 2749 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2750 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2751 { 2752 Mat_MPIAIJ *b; 2753 PetscErrorCode ierr; 2754 PetscInt i; 2755 2756 PetscFunctionBegin; 2757 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2758 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2759 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2760 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2761 2762 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2763 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2766 if (d_nnz) { 2767 for (i=0; i<B->rmap->n; i++) { 2768 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2769 } 2770 } 2771 if (o_nnz) { 2772 for (i=0; i<B->rmap->n; i++) { 2773 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2774 } 2775 } 2776 b = (Mat_MPIAIJ*)B->data; 2777 2778 if (!B->preallocated) { 2779 /* Explicitly create 2 MATSEQAIJ matrices. */ 2780 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2781 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2782 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2784 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2785 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2786 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2788 } 2789 2790 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2791 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2792 B->preallocated = PETSC_TRUE; 2793 PetscFunctionReturn(0); 2794 } 2795 EXTERN_C_END 2796 2797 #undef __FUNCT__ 2798 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2799 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2800 { 2801 Mat mat; 2802 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2803 PetscErrorCode ierr; 2804 2805 PetscFunctionBegin; 2806 *newmat = 0; 2807 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2808 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2809 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2810 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2811 a = (Mat_MPIAIJ*)mat->data; 2812 2813 mat->factortype = matin->factortype; 2814 mat->rmap->bs = matin->rmap->bs; 2815 mat->assembled = PETSC_TRUE; 2816 mat->insertmode = NOT_SET_VALUES; 2817 mat->preallocated = PETSC_TRUE; 2818 2819 a->size = oldmat->size; 2820 a->rank = oldmat->rank; 2821 a->donotstash = oldmat->donotstash; 2822 a->roworiented = oldmat->roworiented; 2823 a->rowindices = 0; 2824 a->rowvalues = 0; 2825 a->getrowactive = PETSC_FALSE; 2826 2827 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2828 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2829 2830 if (oldmat->colmap) { 2831 #if defined (PETSC_USE_CTABLE) 2832 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2833 #else 2834 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2835 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2836 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2837 #endif 2838 } else a->colmap = 0; 2839 if (oldmat->garray) { 2840 PetscInt len; 2841 len = oldmat->B->cmap->n; 2842 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2843 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2844 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2845 } else a->garray = 0; 2846 2847 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2848 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2849 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2850 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2851 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2852 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2853 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2855 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2856 *newmat = mat; 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatLoad_MPIAIJ" 2862 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, Mat newMat) 2863 { 2864 PetscScalar *vals,*svals; 2865 MPI_Comm comm = ((PetscObject)viewer)->comm; 2866 MPI_Status status; 2867 PetscErrorCode ierr; 2868 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2869 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2870 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2871 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2872 PetscInt cend,cstart,n,*rowners,sizesset=1; 2873 int fd; 2874 2875 PetscFunctionBegin; 2876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2878 if (!rank) { 2879 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2880 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2881 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2882 } 2883 2884 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2889 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2890 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2891 2892 /* If global sizes are set, check if they are consistent with that given in the file */ 2893 if (sizesset) { 2894 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2895 } 2896 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2897 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2898 2899 /* determine ownership of all rows */ 2900 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2901 else m = newMat->rmap->n; /* Set by user */ 2902 2903 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2904 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2905 2906 /* First process needs enough room for process with most rows */ 2907 if (!rank) { 2908 mmax = rowners[1]; 2909 for (i=2; i<size; i++) { 2910 mmax = PetscMax(mmax,rowners[i]); 2911 } 2912 } else mmax = m; 2913 2914 rowners[0] = 0; 2915 for (i=2; i<=size; i++) { 2916 rowners[i] += rowners[i-1]; 2917 } 2918 rstart = rowners[rank]; 2919 rend = rowners[rank+1]; 2920 2921 /* distribute row lengths to all processors */ 2922 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2923 if (!rank) { 2924 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2925 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2926 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2927 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2928 for (j=0; j<m; j++) { 2929 procsnz[0] += ourlens[j]; 2930 } 2931 for (i=1; i<size; i++) { 2932 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2933 /* calculate the number of nonzeros on each processor */ 2934 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2935 procsnz[i] += rowlengths[j]; 2936 } 2937 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2938 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2939 } 2940 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2941 } else { 2942 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2943 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2944 } 2945 2946 if (!rank) { 2947 /* determine max buffer needed and allocate it */ 2948 maxnz = 0; 2949 for (i=0; i<size; i++) { 2950 maxnz = PetscMax(maxnz,procsnz[i]); 2951 } 2952 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2953 2954 /* read in my part of the matrix column indices */ 2955 nz = procsnz[0]; 2956 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2957 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2958 2959 /* read in every one elses and ship off */ 2960 for (i=1; i<size; i++) { 2961 nz = procsnz[i]; 2962 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2963 mpicnt = PetscMPIIntCast(nz); 2964 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2965 } 2966 ierr = PetscFree(cols);CHKERRQ(ierr); 2967 } else { 2968 /* determine buffer space needed for message */ 2969 nz = 0; 2970 for (i=0; i<m; i++) { 2971 nz += ourlens[i]; 2972 } 2973 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2974 2975 /* receive message of column indices*/ 2976 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2977 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2978 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2979 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2980 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2981 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2982 } 2983 2984 /* determine column ownership if matrix is not square */ 2985 if (N != M) { 2986 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2987 else n = newMat->cmap->n; 2988 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2989 cstart = cend - n; 2990 } else { 2991 cstart = rstart; 2992 cend = rend; 2993 n = cend - cstart; 2994 } 2995 2996 /* loop over local rows, determining number of off diagonal entries */ 2997 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2998 jj = 0; 2999 for (i=0; i<m; i++) { 3000 for (j=0; j<ourlens[i]; j++) { 3001 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3002 jj++; 3003 } 3004 } 3005 3006 for (i=0; i<m; i++) { 3007 ourlens[i] -= offlens[i]; 3008 } 3009 if (!sizesset) { 3010 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3011 } 3012 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3013 3014 for (i=0; i<m; i++) { 3015 ourlens[i] += offlens[i]; 3016 } 3017 3018 if (!rank) { 3019 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3020 3021 /* read in my part of the matrix numerical values */ 3022 nz = procsnz[0]; 3023 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3024 3025 /* insert into matrix */ 3026 jj = rstart; 3027 smycols = mycols; 3028 svals = vals; 3029 for (i=0; i<m; i++) { 3030 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3031 smycols += ourlens[i]; 3032 svals += ourlens[i]; 3033 jj++; 3034 } 3035 3036 /* read in other processors and ship out */ 3037 for (i=1; i<size; i++) { 3038 nz = procsnz[i]; 3039 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3040 mpicnt = PetscMPIIntCast(nz); 3041 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3042 } 3043 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3044 } else { 3045 /* receive numeric values */ 3046 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3047 3048 /* receive message of values*/ 3049 mpicnt = PetscMPIIntCast(nz); 3050 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr); 3051 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3052 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3053 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3054 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3055 3056 /* insert into matrix */ 3057 jj = rstart; 3058 smycols = mycols; 3059 svals = vals; 3060 for (i=0; i<m; i++) { 3061 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3062 smycols += ourlens[i]; 3063 svals += ourlens[i]; 3064 jj++; 3065 } 3066 } 3067 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3068 ierr = PetscFree(vals);CHKERRQ(ierr); 3069 ierr = PetscFree(mycols);CHKERRQ(ierr); 3070 ierr = PetscFree(rowners);CHKERRQ(ierr); 3071 3072 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 PetscFunctionReturn(0); 3075 } 3076 3077 #undef __FUNCT__ 3078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3079 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3080 { 3081 PetscErrorCode ierr; 3082 IS iscol_local; 3083 PetscInt csize; 3084 3085 PetscFunctionBegin; 3086 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3087 if (call == MAT_REUSE_MATRIX) { 3088 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3089 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3090 } else { 3091 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3092 } 3093 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3094 if (call == MAT_INITIAL_MATRIX) { 3095 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3096 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3097 } 3098 PetscFunctionReturn(0); 3099 } 3100 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3103 /* 3104 Not great since it makes two copies of the submatrix, first an SeqAIJ 3105 in local and then by concatenating the local matrices the end result. 3106 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3107 3108 Note: This requires a sequential iscol with all indices. 3109 */ 3110 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3111 { 3112 PetscErrorCode ierr; 3113 PetscMPIInt rank,size; 3114 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3115 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3116 Mat *local,M,Mreuse; 3117 MatScalar *vwork,*aa; 3118 MPI_Comm comm = ((PetscObject)mat)->comm; 3119 Mat_SeqAIJ *aij; 3120 3121 3122 PetscFunctionBegin; 3123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3124 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3125 3126 if (call == MAT_REUSE_MATRIX) { 3127 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3128 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3129 local = &Mreuse; 3130 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3131 } else { 3132 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3133 Mreuse = *local; 3134 ierr = PetscFree(local);CHKERRQ(ierr); 3135 } 3136 3137 /* 3138 m - number of local rows 3139 n - number of columns (same on all processors) 3140 rstart - first row in new global matrix generated 3141 */ 3142 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3143 if (call == MAT_INITIAL_MATRIX) { 3144 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3145 ii = aij->i; 3146 jj = aij->j; 3147 3148 /* 3149 Determine the number of non-zeros in the diagonal and off-diagonal 3150 portions of the matrix in order to do correct preallocation 3151 */ 3152 3153 /* first get start and end of "diagonal" columns */ 3154 if (csize == PETSC_DECIDE) { 3155 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3156 if (mglobal == n) { /* square matrix */ 3157 nlocal = m; 3158 } else { 3159 nlocal = n/size + ((n % size) > rank); 3160 } 3161 } else { 3162 nlocal = csize; 3163 } 3164 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3165 rstart = rend - nlocal; 3166 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3167 3168 /* next, compute all the lengths */ 3169 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3170 olens = dlens + m; 3171 for (i=0; i<m; i++) { 3172 jend = ii[i+1] - ii[i]; 3173 olen = 0; 3174 dlen = 0; 3175 for (j=0; j<jend; j++) { 3176 if (*jj < rstart || *jj >= rend) olen++; 3177 else dlen++; 3178 jj++; 3179 } 3180 olens[i] = olen; 3181 dlens[i] = dlen; 3182 } 3183 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3184 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3185 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3186 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3187 ierr = PetscFree(dlens);CHKERRQ(ierr); 3188 } else { 3189 PetscInt ml,nl; 3190 3191 M = *newmat; 3192 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3193 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3194 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3195 /* 3196 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3197 rather than the slower MatSetValues(). 3198 */ 3199 M->was_assembled = PETSC_TRUE; 3200 M->assembled = PETSC_FALSE; 3201 } 3202 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3203 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3204 ii = aij->i; 3205 jj = aij->j; 3206 aa = aij->a; 3207 for (i=0; i<m; i++) { 3208 row = rstart + i; 3209 nz = ii[i+1] - ii[i]; 3210 cwork = jj; jj += nz; 3211 vwork = aa; aa += nz; 3212 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3213 } 3214 3215 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3217 *newmat = M; 3218 3219 /* save submatrix used in processor for next request */ 3220 if (call == MAT_INITIAL_MATRIX) { 3221 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3222 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3223 } 3224 3225 PetscFunctionReturn(0); 3226 } 3227 3228 EXTERN_C_BEGIN 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3231 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3232 { 3233 PetscInt m,cstart, cend,j,nnz,i,d; 3234 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3235 const PetscInt *JJ; 3236 PetscScalar *values; 3237 PetscErrorCode ierr; 3238 3239 PetscFunctionBegin; 3240 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3241 3242 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3243 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3244 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3245 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3246 m = B->rmap->n; 3247 cstart = B->cmap->rstart; 3248 cend = B->cmap->rend; 3249 rstart = B->rmap->rstart; 3250 3251 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3252 3253 #if defined(PETSC_USE_DEBUGGING) 3254 for (i=0; i<m; i++) { 3255 nnz = Ii[i+1]- Ii[i]; 3256 JJ = J + Ii[i]; 3257 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3258 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3259 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3260 } 3261 #endif 3262 3263 for (i=0; i<m; i++) { 3264 nnz = Ii[i+1]- Ii[i]; 3265 JJ = J + Ii[i]; 3266 nnz_max = PetscMax(nnz_max,nnz); 3267 d = 0; 3268 for (j=0; j<nnz; j++) { 3269 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3270 } 3271 d_nnz[i] = d; 3272 o_nnz[i] = nnz - d; 3273 } 3274 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3275 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3276 3277 if (v) values = (PetscScalar*)v; 3278 else { 3279 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3280 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3281 } 3282 3283 for (i=0; i<m; i++) { 3284 ii = i + rstart; 3285 nnz = Ii[i+1]- Ii[i]; 3286 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3287 } 3288 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 3291 if (!v) { 3292 ierr = PetscFree(values);CHKERRQ(ierr); 3293 } 3294 PetscFunctionReturn(0); 3295 } 3296 EXTERN_C_END 3297 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3300 /*@ 3301 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3302 (the default parallel PETSc format). 3303 3304 Collective on MPI_Comm 3305 3306 Input Parameters: 3307 + B - the matrix 3308 . i - the indices into j for the start of each local row (starts with zero) 3309 . j - the column indices for each local row (starts with zero) 3310 - v - optional values in the matrix 3311 3312 Level: developer 3313 3314 Notes: 3315 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3316 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3317 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3318 3319 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3320 3321 The format which is used for the sparse matrix input, is equivalent to a 3322 row-major ordering.. i.e for the following matrix, the input data expected is 3323 as shown: 3324 3325 1 0 0 3326 2 0 3 P0 3327 ------- 3328 4 5 6 P1 3329 3330 Process0 [P0]: rows_owned=[0,1] 3331 i = {0,1,3} [size = nrow+1 = 2+1] 3332 j = {0,0,2} [size = nz = 6] 3333 v = {1,2,3} [size = nz = 6] 3334 3335 Process1 [P1]: rows_owned=[2] 3336 i = {0,3} [size = nrow+1 = 1+1] 3337 j = {0,1,2} [size = nz = 6] 3338 v = {4,5,6} [size = nz = 6] 3339 3340 .keywords: matrix, aij, compressed row, sparse, parallel 3341 3342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3343 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3344 @*/ 3345 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3346 { 3347 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3348 3349 PetscFunctionBegin; 3350 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3351 if (f) { 3352 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3353 } 3354 PetscFunctionReturn(0); 3355 } 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3359 /*@C 3360 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3361 (the default parallel PETSc format). For good matrix assembly performance 3362 the user should preallocate the matrix storage by setting the parameters 3363 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3364 performance can be increased by more than a factor of 50. 3365 3366 Collective on MPI_Comm 3367 3368 Input Parameters: 3369 + A - the matrix 3370 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3371 (same value is used for all local rows) 3372 . d_nnz - array containing the number of nonzeros in the various rows of the 3373 DIAGONAL portion of the local submatrix (possibly different for each row) 3374 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3375 The size of this array is equal to the number of local rows, i.e 'm'. 3376 You must leave room for the diagonal entry even if it is zero. 3377 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3378 submatrix (same value is used for all local rows). 3379 - o_nnz - array containing the number of nonzeros in the various rows of the 3380 OFF-DIAGONAL portion of the local submatrix (possibly different for 3381 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3382 structure. The size of this array is equal to the number 3383 of local rows, i.e 'm'. 3384 3385 If the *_nnz parameter is given then the *_nz parameter is ignored 3386 3387 The AIJ format (also called the Yale sparse matrix format or 3388 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3389 storage. The stored row and column indices begin with zero. See the users manual for details. 3390 3391 The parallel matrix is partitioned such that the first m0 rows belong to 3392 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3393 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3394 3395 The DIAGONAL portion of the local submatrix of a processor can be defined 3396 as the submatrix which is obtained by extraction the part corresponding 3397 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3398 first row that belongs to the processor, and r2 is the last row belonging 3399 to the this processor. This is a square mxm matrix. The remaining portion 3400 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3401 3402 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3403 3404 You can call MatGetInfo() to get information on how effective the preallocation was; 3405 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3406 You can also run with the option -info and look for messages with the string 3407 malloc in them to see if additional memory allocation was needed. 3408 3409 Example usage: 3410 3411 Consider the following 8x8 matrix with 34 non-zero values, that is 3412 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3413 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3414 as follows: 3415 3416 .vb 3417 1 2 0 | 0 3 0 | 0 4 3418 Proc0 0 5 6 | 7 0 0 | 8 0 3419 9 0 10 | 11 0 0 | 12 0 3420 ------------------------------------- 3421 13 0 14 | 15 16 17 | 0 0 3422 Proc1 0 18 0 | 19 20 21 | 0 0 3423 0 0 0 | 22 23 0 | 24 0 3424 ------------------------------------- 3425 Proc2 25 26 27 | 0 0 28 | 29 0 3426 30 0 0 | 31 32 33 | 0 34 3427 .ve 3428 3429 This can be represented as a collection of submatrices as: 3430 3431 .vb 3432 A B C 3433 D E F 3434 G H I 3435 .ve 3436 3437 Where the submatrices A,B,C are owned by proc0, D,E,F are 3438 owned by proc1, G,H,I are owned by proc2. 3439 3440 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3441 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3442 The 'M','N' parameters are 8,8, and have the same values on all procs. 3443 3444 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3445 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3446 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3447 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3448 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3449 matrix, ans [DF] as another SeqAIJ matrix. 3450 3451 When d_nz, o_nz parameters are specified, d_nz storage elements are 3452 allocated for every row of the local diagonal submatrix, and o_nz 3453 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3454 One way to choose d_nz and o_nz is to use the max nonzerors per local 3455 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3456 In this case, the values of d_nz,o_nz are: 3457 .vb 3458 proc0 : dnz = 2, o_nz = 2 3459 proc1 : dnz = 3, o_nz = 2 3460 proc2 : dnz = 1, o_nz = 4 3461 .ve 3462 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3463 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3464 for proc3. i.e we are using 12+15+10=37 storage locations to store 3465 34 values. 3466 3467 When d_nnz, o_nnz parameters are specified, the storage is specified 3468 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3469 In the above case the values for d_nnz,o_nnz are: 3470 .vb 3471 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3472 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3473 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3474 .ve 3475 Here the space allocated is sum of all the above values i.e 34, and 3476 hence pre-allocation is perfect. 3477 3478 Level: intermediate 3479 3480 .keywords: matrix, aij, compressed row, sparse, parallel 3481 3482 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3483 MPIAIJ, MatGetInfo() 3484 @*/ 3485 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3486 { 3487 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3488 3489 PetscFunctionBegin; 3490 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3491 if (f) { 3492 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3493 } 3494 PetscFunctionReturn(0); 3495 } 3496 3497 #undef __FUNCT__ 3498 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3499 /*@ 3500 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3501 CSR format the local rows. 3502 3503 Collective on MPI_Comm 3504 3505 Input Parameters: 3506 + comm - MPI communicator 3507 . m - number of local rows (Cannot be PETSC_DECIDE) 3508 . n - This value should be the same as the local size used in creating the 3509 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3510 calculated if N is given) For square matrices n is almost always m. 3511 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3512 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3513 . i - row indices 3514 . j - column indices 3515 - a - matrix values 3516 3517 Output Parameter: 3518 . mat - the matrix 3519 3520 Level: intermediate 3521 3522 Notes: 3523 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3524 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3525 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3526 3527 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3528 3529 The format which is used for the sparse matrix input, is equivalent to a 3530 row-major ordering.. i.e for the following matrix, the input data expected is 3531 as shown: 3532 3533 1 0 0 3534 2 0 3 P0 3535 ------- 3536 4 5 6 P1 3537 3538 Process0 [P0]: rows_owned=[0,1] 3539 i = {0,1,3} [size = nrow+1 = 2+1] 3540 j = {0,0,2} [size = nz = 6] 3541 v = {1,2,3} [size = nz = 6] 3542 3543 Process1 [P1]: rows_owned=[2] 3544 i = {0,3} [size = nrow+1 = 1+1] 3545 j = {0,1,2} [size = nz = 6] 3546 v = {4,5,6} [size = nz = 6] 3547 3548 .keywords: matrix, aij, compressed row, sparse, parallel 3549 3550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3551 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3552 @*/ 3553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3554 { 3555 PetscErrorCode ierr; 3556 3557 PetscFunctionBegin; 3558 if (i[0]) { 3559 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3560 } 3561 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3562 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3563 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3564 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3565 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3566 PetscFunctionReturn(0); 3567 } 3568 3569 #undef __FUNCT__ 3570 #define __FUNCT__ "MatCreateMPIAIJ" 3571 /*@C 3572 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3573 (the default parallel PETSc format). For good matrix assembly performance 3574 the user should preallocate the matrix storage by setting the parameters 3575 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3576 performance can be increased by more than a factor of 50. 3577 3578 Collective on MPI_Comm 3579 3580 Input Parameters: 3581 + comm - MPI communicator 3582 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3583 This value should be the same as the local size used in creating the 3584 y vector for the matrix-vector product y = Ax. 3585 . n - This value should be the same as the local size used in creating the 3586 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3587 calculated if N is given) For square matrices n is almost always m. 3588 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3589 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3590 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3591 (same value is used for all local rows) 3592 . d_nnz - array containing the number of nonzeros in the various rows of the 3593 DIAGONAL portion of the local submatrix (possibly different for each row) 3594 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3595 The size of this array is equal to the number of local rows, i.e 'm'. 3596 You must leave room for the diagonal entry even if it is zero. 3597 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3598 submatrix (same value is used for all local rows). 3599 - o_nnz - array containing the number of nonzeros in the various rows of the 3600 OFF-DIAGONAL portion of the local submatrix (possibly different for 3601 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3602 structure. The size of this array is equal to the number 3603 of local rows, i.e 'm'. 3604 3605 Output Parameter: 3606 . A - the matrix 3607 3608 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3609 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3610 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3611 3612 Notes: 3613 If the *_nnz parameter is given then the *_nz parameter is ignored 3614 3615 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3616 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3617 storage requirements for this matrix. 3618 3619 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3620 processor than it must be used on all processors that share the object for 3621 that argument. 3622 3623 The user MUST specify either the local or global matrix dimensions 3624 (possibly both). 3625 3626 The parallel matrix is partitioned across processors such that the 3627 first m0 rows belong to process 0, the next m1 rows belong to 3628 process 1, the next m2 rows belong to process 2 etc.. where 3629 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3630 values corresponding to [m x N] submatrix. 3631 3632 The columns are logically partitioned with the n0 columns belonging 3633 to 0th partition, the next n1 columns belonging to the next 3634 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3635 3636 The DIAGONAL portion of the local submatrix on any given processor 3637 is the submatrix corresponding to the rows and columns m,n 3638 corresponding to the given processor. i.e diagonal matrix on 3639 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3640 etc. The remaining portion of the local submatrix [m x (N-n)] 3641 constitute the OFF-DIAGONAL portion. The example below better 3642 illustrates this concept. 3643 3644 For a square global matrix we define each processor's diagonal portion 3645 to be its local rows and the corresponding columns (a square submatrix); 3646 each processor's off-diagonal portion encompasses the remainder of the 3647 local matrix (a rectangular submatrix). 3648 3649 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3650 3651 When calling this routine with a single process communicator, a matrix of 3652 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3653 type of communicator, use the construction mechanism: 3654 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3655 3656 By default, this format uses inodes (identical nodes) when possible. 3657 We search for consecutive rows with the same nonzero structure, thereby 3658 reusing matrix information to achieve increased efficiency. 3659 3660 Options Database Keys: 3661 + -mat_no_inode - Do not use inodes 3662 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3663 - -mat_aij_oneindex - Internally use indexing starting at 1 3664 rather than 0. Note that when calling MatSetValues(), 3665 the user still MUST index entries starting at 0! 3666 3667 3668 Example usage: 3669 3670 Consider the following 8x8 matrix with 34 non-zero values, that is 3671 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3672 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3673 as follows: 3674 3675 .vb 3676 1 2 0 | 0 3 0 | 0 4 3677 Proc0 0 5 6 | 7 0 0 | 8 0 3678 9 0 10 | 11 0 0 | 12 0 3679 ------------------------------------- 3680 13 0 14 | 15 16 17 | 0 0 3681 Proc1 0 18 0 | 19 20 21 | 0 0 3682 0 0 0 | 22 23 0 | 24 0 3683 ------------------------------------- 3684 Proc2 25 26 27 | 0 0 28 | 29 0 3685 30 0 0 | 31 32 33 | 0 34 3686 .ve 3687 3688 This can be represented as a collection of submatrices as: 3689 3690 .vb 3691 A B C 3692 D E F 3693 G H I 3694 .ve 3695 3696 Where the submatrices A,B,C are owned by proc0, D,E,F are 3697 owned by proc1, G,H,I are owned by proc2. 3698 3699 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3700 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3701 The 'M','N' parameters are 8,8, and have the same values on all procs. 3702 3703 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3704 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3705 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3706 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3707 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3708 matrix, ans [DF] as another SeqAIJ matrix. 3709 3710 When d_nz, o_nz parameters are specified, d_nz storage elements are 3711 allocated for every row of the local diagonal submatrix, and o_nz 3712 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3713 One way to choose d_nz and o_nz is to use the max nonzerors per local 3714 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3715 In this case, the values of d_nz,o_nz are: 3716 .vb 3717 proc0 : dnz = 2, o_nz = 2 3718 proc1 : dnz = 3, o_nz = 2 3719 proc2 : dnz = 1, o_nz = 4 3720 .ve 3721 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3722 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3723 for proc3. i.e we are using 12+15+10=37 storage locations to store 3724 34 values. 3725 3726 When d_nnz, o_nnz parameters are specified, the storage is specified 3727 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3728 In the above case the values for d_nnz,o_nnz are: 3729 .vb 3730 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3731 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3732 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3733 .ve 3734 Here the space allocated is sum of all the above values i.e 34, and 3735 hence pre-allocation is perfect. 3736 3737 Level: intermediate 3738 3739 .keywords: matrix, aij, compressed row, sparse, parallel 3740 3741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3742 MPIAIJ, MatCreateMPIAIJWithArrays() 3743 @*/ 3744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3745 { 3746 PetscErrorCode ierr; 3747 PetscMPIInt size; 3748 3749 PetscFunctionBegin; 3750 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3751 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3752 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3753 if (size > 1) { 3754 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3755 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3756 } else { 3757 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3758 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3759 } 3760 PetscFunctionReturn(0); 3761 } 3762 3763 #undef __FUNCT__ 3764 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3766 { 3767 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3768 3769 PetscFunctionBegin; 3770 *Ad = a->A; 3771 *Ao = a->B; 3772 *colmap = a->garray; 3773 PetscFunctionReturn(0); 3774 } 3775 3776 #undef __FUNCT__ 3777 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3778 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3779 { 3780 PetscErrorCode ierr; 3781 PetscInt i; 3782 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3783 3784 PetscFunctionBegin; 3785 if (coloring->ctype == IS_COLORING_GLOBAL) { 3786 ISColoringValue *allcolors,*colors; 3787 ISColoring ocoloring; 3788 3789 /* set coloring for diagonal portion */ 3790 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3791 3792 /* set coloring for off-diagonal portion */ 3793 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3794 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3795 for (i=0; i<a->B->cmap->n; i++) { 3796 colors[i] = allcolors[a->garray[i]]; 3797 } 3798 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3799 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3800 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3801 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3802 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3803 ISColoringValue *colors; 3804 PetscInt *larray; 3805 ISColoring ocoloring; 3806 3807 /* set coloring for diagonal portion */ 3808 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3809 for (i=0; i<a->A->cmap->n; i++) { 3810 larray[i] = i + A->cmap->rstart; 3811 } 3812 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3813 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3814 for (i=0; i<a->A->cmap->n; i++) { 3815 colors[i] = coloring->colors[larray[i]]; 3816 } 3817 ierr = PetscFree(larray);CHKERRQ(ierr); 3818 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3819 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3820 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3821 3822 /* set coloring for off-diagonal portion */ 3823 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3824 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3825 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3826 for (i=0; i<a->B->cmap->n; i++) { 3827 colors[i] = coloring->colors[larray[i]]; 3828 } 3829 ierr = PetscFree(larray);CHKERRQ(ierr); 3830 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3831 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3832 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3833 } else { 3834 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3835 } 3836 3837 PetscFunctionReturn(0); 3838 } 3839 3840 #if defined(PETSC_HAVE_ADIC) 3841 #undef __FUNCT__ 3842 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3843 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3844 { 3845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3846 PetscErrorCode ierr; 3847 3848 PetscFunctionBegin; 3849 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3850 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3851 PetscFunctionReturn(0); 3852 } 3853 #endif 3854 3855 #undef __FUNCT__ 3856 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3857 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3858 { 3859 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3860 PetscErrorCode ierr; 3861 3862 PetscFunctionBegin; 3863 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3864 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3865 PetscFunctionReturn(0); 3866 } 3867 3868 #undef __FUNCT__ 3869 #define __FUNCT__ "MatMerge" 3870 /*@ 3871 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3872 matrices from each processor 3873 3874 Collective on MPI_Comm 3875 3876 Input Parameters: 3877 + comm - the communicators the parallel matrix will live on 3878 . inmat - the input sequential matrices 3879 . n - number of local columns (or PETSC_DECIDE) 3880 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3881 3882 Output Parameter: 3883 . outmat - the parallel matrix generated 3884 3885 Level: advanced 3886 3887 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3888 3889 @*/ 3890 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3891 { 3892 PetscErrorCode ierr; 3893 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3894 PetscInt *indx; 3895 PetscScalar *values; 3896 3897 PetscFunctionBegin; 3898 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3899 if (scall == MAT_INITIAL_MATRIX){ 3900 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3901 if (n == PETSC_DECIDE){ 3902 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3903 } 3904 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3905 rstart -= m; 3906 3907 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3908 for (i=0;i<m;i++) { 3909 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3910 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3911 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3912 } 3913 /* This routine will ONLY return MPIAIJ type matrix */ 3914 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3915 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3916 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3917 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3918 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3919 3920 } else if (scall == MAT_REUSE_MATRIX){ 3921 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3922 } else { 3923 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3924 } 3925 3926 for (i=0;i<m;i++) { 3927 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3928 Ii = i + rstart; 3929 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3930 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3931 } 3932 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3933 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 3936 PetscFunctionReturn(0); 3937 } 3938 3939 #undef __FUNCT__ 3940 #define __FUNCT__ "MatFileSplit" 3941 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3942 { 3943 PetscErrorCode ierr; 3944 PetscMPIInt rank; 3945 PetscInt m,N,i,rstart,nnz; 3946 size_t len; 3947 const PetscInt *indx; 3948 PetscViewer out; 3949 char *name; 3950 Mat B; 3951 const PetscScalar *values; 3952 3953 PetscFunctionBegin; 3954 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3955 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3956 /* Should this be the type of the diagonal block of A? */ 3957 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3958 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3959 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3960 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3961 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3962 for (i=0;i<m;i++) { 3963 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3964 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3965 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3966 } 3967 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3968 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3969 3970 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3971 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3972 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3973 sprintf(name,"%s.%d",outfile,rank); 3974 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3975 ierr = PetscFree(name); 3976 ierr = MatView(B,out);CHKERRQ(ierr); 3977 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3978 ierr = MatDestroy(B);CHKERRQ(ierr); 3979 PetscFunctionReturn(0); 3980 } 3981 3982 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3983 #undef __FUNCT__ 3984 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3985 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3986 { 3987 PetscErrorCode ierr; 3988 Mat_Merge_SeqsToMPI *merge; 3989 PetscContainer container; 3990 3991 PetscFunctionBegin; 3992 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3993 if (container) { 3994 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4007 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4008 4009 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4010 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4011 } 4012 ierr = PetscFree(merge);CHKERRQ(ierr); 4013 4014 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 #include "../src/mat/utils/freespace.h" 4019 #include "petscbt.h" 4020 4021 #undef __FUNCT__ 4022 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4023 /*@C 4024 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4025 matrices from each processor 4026 4027 Collective on MPI_Comm 4028 4029 Input Parameters: 4030 + comm - the communicators the parallel matrix will live on 4031 . seqmat - the input sequential matrices 4032 . m - number of local rows (or PETSC_DECIDE) 4033 . n - number of local columns (or PETSC_DECIDE) 4034 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4035 4036 Output Parameter: 4037 . mpimat - the parallel matrix generated 4038 4039 Level: advanced 4040 4041 Notes: 4042 The dimensions of the sequential matrix in each processor MUST be the same. 4043 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4044 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4045 @*/ 4046 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4047 { 4048 PetscErrorCode ierr; 4049 MPI_Comm comm=((PetscObject)mpimat)->comm; 4050 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4051 PetscMPIInt size,rank,taga,*len_s; 4052 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4053 PetscInt proc,m; 4054 PetscInt **buf_ri,**buf_rj; 4055 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4056 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4057 MPI_Request *s_waits,*r_waits; 4058 MPI_Status *status; 4059 MatScalar *aa=a->a; 4060 MatScalar **abuf_r,*ba_i; 4061 Mat_Merge_SeqsToMPI *merge; 4062 PetscContainer container; 4063 4064 PetscFunctionBegin; 4065 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4066 4067 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4068 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4069 4070 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4071 if (container) { 4072 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4073 } 4074 bi = merge->bi; 4075 bj = merge->bj; 4076 buf_ri = merge->buf_ri; 4077 buf_rj = merge->buf_rj; 4078 4079 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4080 owners = merge->rowmap->range; 4081 len_s = merge->len_s; 4082 4083 /* send and recv matrix values */ 4084 /*-----------------------------*/ 4085 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4086 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4087 4088 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4089 for (proc=0,k=0; proc<size; proc++){ 4090 if (!len_s[proc]) continue; 4091 i = owners[proc]; 4092 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4093 k++; 4094 } 4095 4096 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4097 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4098 ierr = PetscFree(status);CHKERRQ(ierr); 4099 4100 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4101 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4102 4103 /* insert mat values of mpimat */ 4104 /*----------------------------*/ 4105 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4106 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4107 4108 for (k=0; k<merge->nrecv; k++){ 4109 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4110 nrows = *(buf_ri_k[k]); 4111 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4112 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4113 } 4114 4115 /* set values of ba */ 4116 m = merge->rowmap->n; 4117 for (i=0; i<m; i++) { 4118 arow = owners[rank] + i; 4119 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4120 bnzi = bi[i+1] - bi[i]; 4121 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4122 4123 /* add local non-zero vals of this proc's seqmat into ba */ 4124 anzi = ai[arow+1] - ai[arow]; 4125 aj = a->j + ai[arow]; 4126 aa = a->a + ai[arow]; 4127 nextaj = 0; 4128 for (j=0; nextaj<anzi; j++){ 4129 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4130 ba_i[j] += aa[nextaj++]; 4131 } 4132 } 4133 4134 /* add received vals into ba */ 4135 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4136 /* i-th row */ 4137 if (i == *nextrow[k]) { 4138 anzi = *(nextai[k]+1) - *nextai[k]; 4139 aj = buf_rj[k] + *(nextai[k]); 4140 aa = abuf_r[k] + *(nextai[k]); 4141 nextaj = 0; 4142 for (j=0; nextaj<anzi; j++){ 4143 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4144 ba_i[j] += aa[nextaj++]; 4145 } 4146 } 4147 nextrow[k]++; nextai[k]++; 4148 } 4149 } 4150 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4151 } 4152 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4153 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4154 4155 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4156 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4157 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4158 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4159 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4160 PetscFunctionReturn(0); 4161 } 4162 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4165 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4166 { 4167 PetscErrorCode ierr; 4168 Mat B_mpi; 4169 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4170 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4171 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4172 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4173 PetscInt len,proc,*dnz,*onz; 4174 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4175 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4176 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4177 MPI_Status *status; 4178 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4179 PetscBT lnkbt; 4180 Mat_Merge_SeqsToMPI *merge; 4181 PetscContainer container; 4182 4183 PetscFunctionBegin; 4184 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4185 4186 /* make sure it is a PETSc comm */ 4187 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4188 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4189 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4190 4191 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4192 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4193 4194 /* determine row ownership */ 4195 /*---------------------------------------------------------*/ 4196 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4197 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4198 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4199 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4200 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4201 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4202 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4203 4204 m = merge->rowmap->n; 4205 M = merge->rowmap->N; 4206 owners = merge->rowmap->range; 4207 4208 /* determine the number of messages to send, their lengths */ 4209 /*---------------------------------------------------------*/ 4210 len_s = merge->len_s; 4211 4212 len = 0; /* length of buf_si[] */ 4213 merge->nsend = 0; 4214 for (proc=0; proc<size; proc++){ 4215 len_si[proc] = 0; 4216 if (proc == rank){ 4217 len_s[proc] = 0; 4218 } else { 4219 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4220 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4221 } 4222 if (len_s[proc]) { 4223 merge->nsend++; 4224 nrows = 0; 4225 for (i=owners[proc]; i<owners[proc+1]; i++){ 4226 if (ai[i+1] > ai[i]) nrows++; 4227 } 4228 len_si[proc] = 2*(nrows+1); 4229 len += len_si[proc]; 4230 } 4231 } 4232 4233 /* determine the number and length of messages to receive for ij-structure */ 4234 /*-------------------------------------------------------------------------*/ 4235 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4236 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4237 4238 /* post the Irecv of j-structure */ 4239 /*-------------------------------*/ 4240 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4241 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4242 4243 /* post the Isend of j-structure */ 4244 /*--------------------------------*/ 4245 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4246 4247 for (proc=0, k=0; proc<size; proc++){ 4248 if (!len_s[proc]) continue; 4249 i = owners[proc]; 4250 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4251 k++; 4252 } 4253 4254 /* receives and sends of j-structure are complete */ 4255 /*------------------------------------------------*/ 4256 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4257 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4258 4259 /* send and recv i-structure */ 4260 /*---------------------------*/ 4261 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4262 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4263 4264 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4265 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4266 for (proc=0,k=0; proc<size; proc++){ 4267 if (!len_s[proc]) continue; 4268 /* form outgoing message for i-structure: 4269 buf_si[0]: nrows to be sent 4270 [1:nrows]: row index (global) 4271 [nrows+1:2*nrows+1]: i-structure index 4272 */ 4273 /*-------------------------------------------*/ 4274 nrows = len_si[proc]/2 - 1; 4275 buf_si_i = buf_si + nrows+1; 4276 buf_si[0] = nrows; 4277 buf_si_i[0] = 0; 4278 nrows = 0; 4279 for (i=owners[proc]; i<owners[proc+1]; i++){ 4280 anzi = ai[i+1] - ai[i]; 4281 if (anzi) { 4282 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4283 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4284 nrows++; 4285 } 4286 } 4287 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4288 k++; 4289 buf_si += len_si[proc]; 4290 } 4291 4292 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4293 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4294 4295 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4296 for (i=0; i<merge->nrecv; i++){ 4297 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4298 } 4299 4300 ierr = PetscFree(len_si);CHKERRQ(ierr); 4301 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4302 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4303 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4304 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4305 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4306 ierr = PetscFree(status);CHKERRQ(ierr); 4307 4308 /* compute a local seq matrix in each processor */ 4309 /*----------------------------------------------*/ 4310 /* allocate bi array and free space for accumulating nonzero column info */ 4311 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4312 bi[0] = 0; 4313 4314 /* create and initialize a linked list */ 4315 nlnk = N+1; 4316 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4317 4318 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4319 len = 0; 4320 len = ai[owners[rank+1]] - ai[owners[rank]]; 4321 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4322 current_space = free_space; 4323 4324 /* determine symbolic info for each local row */ 4325 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4326 4327 for (k=0; k<merge->nrecv; k++){ 4328 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4329 nrows = *buf_ri_k[k]; 4330 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4331 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4332 } 4333 4334 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4335 len = 0; 4336 for (i=0;i<m;i++) { 4337 bnzi = 0; 4338 /* add local non-zero cols of this proc's seqmat into lnk */ 4339 arow = owners[rank] + i; 4340 anzi = ai[arow+1] - ai[arow]; 4341 aj = a->j + ai[arow]; 4342 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4343 bnzi += nlnk; 4344 /* add received col data into lnk */ 4345 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4346 if (i == *nextrow[k]) { /* i-th row */ 4347 anzi = *(nextai[k]+1) - *nextai[k]; 4348 aj = buf_rj[k] + *nextai[k]; 4349 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4350 bnzi += nlnk; 4351 nextrow[k]++; nextai[k]++; 4352 } 4353 } 4354 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4355 4356 /* if free space is not available, make more free space */ 4357 if (current_space->local_remaining<bnzi) { 4358 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4359 nspacedouble++; 4360 } 4361 /* copy data into free space, then initialize lnk */ 4362 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4363 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4364 4365 current_space->array += bnzi; 4366 current_space->local_used += bnzi; 4367 current_space->local_remaining -= bnzi; 4368 4369 bi[i+1] = bi[i] + bnzi; 4370 } 4371 4372 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4373 4374 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4375 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4376 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4377 4378 /* create symbolic parallel matrix B_mpi */ 4379 /*---------------------------------------*/ 4380 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4381 if (n==PETSC_DECIDE) { 4382 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4383 } else { 4384 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4385 } 4386 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4387 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4388 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4389 4390 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4391 B_mpi->assembled = PETSC_FALSE; 4392 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4393 merge->bi = bi; 4394 merge->bj = bj; 4395 merge->buf_ri = buf_ri; 4396 merge->buf_rj = buf_rj; 4397 merge->coi = PETSC_NULL; 4398 merge->coj = PETSC_NULL; 4399 merge->owners_co = PETSC_NULL; 4400 4401 /* attach the supporting struct to B_mpi for reuse */ 4402 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4403 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4404 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4405 *mpimat = B_mpi; 4406 4407 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4408 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4409 PetscFunctionReturn(0); 4410 } 4411 4412 #undef __FUNCT__ 4413 #define __FUNCT__ "MatMerge_SeqsToMPI" 4414 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4415 { 4416 PetscErrorCode ierr; 4417 4418 PetscFunctionBegin; 4419 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4420 if (scall == MAT_INITIAL_MATRIX){ 4421 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4422 } 4423 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4424 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4425 PetscFunctionReturn(0); 4426 } 4427 4428 #undef __FUNCT__ 4429 #define __FUNCT__ "MatGetLocalMat" 4430 /*@ 4431 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4432 4433 Not Collective 4434 4435 Input Parameters: 4436 + A - the matrix 4437 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4438 4439 Output Parameter: 4440 . A_loc - the local sequential matrix generated 4441 4442 Level: developer 4443 4444 @*/ 4445 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4446 { 4447 PetscErrorCode ierr; 4448 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4449 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4450 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4451 MatScalar *aa=a->a,*ba=b->a,*cam; 4452 PetscScalar *ca; 4453 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4454 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4455 4456 PetscFunctionBegin; 4457 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4458 if (scall == MAT_INITIAL_MATRIX){ 4459 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4460 ci[0] = 0; 4461 for (i=0; i<am; i++){ 4462 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4463 } 4464 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4465 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4466 k = 0; 4467 for (i=0; i<am; i++) { 4468 ncols_o = bi[i+1] - bi[i]; 4469 ncols_d = ai[i+1] - ai[i]; 4470 /* off-diagonal portion of A */ 4471 for (jo=0; jo<ncols_o; jo++) { 4472 col = cmap[*bj]; 4473 if (col >= cstart) break; 4474 cj[k] = col; bj++; 4475 ca[k++] = *ba++; 4476 } 4477 /* diagonal portion of A */ 4478 for (j=0; j<ncols_d; j++) { 4479 cj[k] = cstart + *aj++; 4480 ca[k++] = *aa++; 4481 } 4482 /* off-diagonal portion of A */ 4483 for (j=jo; j<ncols_o; j++) { 4484 cj[k] = cmap[*bj++]; 4485 ca[k++] = *ba++; 4486 } 4487 } 4488 /* put together the new matrix */ 4489 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4490 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4491 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4492 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4493 mat->free_a = PETSC_TRUE; 4494 mat->free_ij = PETSC_TRUE; 4495 mat->nonew = 0; 4496 } else if (scall == MAT_REUSE_MATRIX){ 4497 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4498 ci = mat->i; cj = mat->j; cam = mat->a; 4499 for (i=0; i<am; i++) { 4500 /* off-diagonal portion of A */ 4501 ncols_o = bi[i+1] - bi[i]; 4502 for (jo=0; jo<ncols_o; jo++) { 4503 col = cmap[*bj]; 4504 if (col >= cstart) break; 4505 *cam++ = *ba++; bj++; 4506 } 4507 /* diagonal portion of A */ 4508 ncols_d = ai[i+1] - ai[i]; 4509 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4510 /* off-diagonal portion of A */ 4511 for (j=jo; j<ncols_o; j++) { 4512 *cam++ = *ba++; bj++; 4513 } 4514 } 4515 } else { 4516 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4517 } 4518 4519 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4520 PetscFunctionReturn(0); 4521 } 4522 4523 #undef __FUNCT__ 4524 #define __FUNCT__ "MatGetLocalMatCondensed" 4525 /*@C 4526 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4527 4528 Not Collective 4529 4530 Input Parameters: 4531 + A - the matrix 4532 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4533 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4534 4535 Output Parameter: 4536 . A_loc - the local sequential matrix generated 4537 4538 Level: developer 4539 4540 @*/ 4541 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4542 { 4543 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4544 PetscErrorCode ierr; 4545 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4546 IS isrowa,iscola; 4547 Mat *aloc; 4548 4549 PetscFunctionBegin; 4550 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4551 if (!row){ 4552 start = A->rmap->rstart; end = A->rmap->rend; 4553 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4554 } else { 4555 isrowa = *row; 4556 } 4557 if (!col){ 4558 start = A->cmap->rstart; 4559 cmap = a->garray; 4560 nzA = a->A->cmap->n; 4561 nzB = a->B->cmap->n; 4562 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4563 ncols = 0; 4564 for (i=0; i<nzB; i++) { 4565 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4566 else break; 4567 } 4568 imark = i; 4569 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4570 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4571 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4572 ierr = PetscFree(idx);CHKERRQ(ierr); 4573 } else { 4574 iscola = *col; 4575 } 4576 if (scall != MAT_INITIAL_MATRIX){ 4577 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4578 aloc[0] = *A_loc; 4579 } 4580 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4581 *A_loc = aloc[0]; 4582 ierr = PetscFree(aloc);CHKERRQ(ierr); 4583 if (!row){ 4584 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4585 } 4586 if (!col){ 4587 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4588 } 4589 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4590 PetscFunctionReturn(0); 4591 } 4592 4593 #undef __FUNCT__ 4594 #define __FUNCT__ "MatGetBrowsOfAcols" 4595 /*@C 4596 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4597 4598 Collective on Mat 4599 4600 Input Parameters: 4601 + A,B - the matrices in mpiaij format 4602 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4603 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4604 4605 Output Parameter: 4606 + rowb, colb - index sets of rows and columns of B to extract 4607 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4608 - B_seq - the sequential matrix generated 4609 4610 Level: developer 4611 4612 @*/ 4613 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4614 { 4615 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4616 PetscErrorCode ierr; 4617 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4618 IS isrowb,iscolb; 4619 Mat *bseq; 4620 4621 PetscFunctionBegin; 4622 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4623 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4624 } 4625 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4626 4627 if (scall == MAT_INITIAL_MATRIX){ 4628 start = A->cmap->rstart; 4629 cmap = a->garray; 4630 nzA = a->A->cmap->n; 4631 nzB = a->B->cmap->n; 4632 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4633 ncols = 0; 4634 for (i=0; i<nzB; i++) { /* row < local row index */ 4635 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4636 else break; 4637 } 4638 imark = i; 4639 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4640 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4641 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4642 ierr = PetscFree(idx);CHKERRQ(ierr); 4643 *brstart = imark; 4644 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4645 } else { 4646 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4647 isrowb = *rowb; iscolb = *colb; 4648 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4649 bseq[0] = *B_seq; 4650 } 4651 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4652 *B_seq = bseq[0]; 4653 ierr = PetscFree(bseq);CHKERRQ(ierr); 4654 if (!rowb){ 4655 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4656 } else { 4657 *rowb = isrowb; 4658 } 4659 if (!colb){ 4660 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4661 } else { 4662 *colb = iscolb; 4663 } 4664 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4665 PetscFunctionReturn(0); 4666 } 4667 4668 #undef __FUNCT__ 4669 #define __FUNCT__ "MatGetBrowsOfAoCols" 4670 /*@C 4671 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4672 of the OFF-DIAGONAL portion of local A 4673 4674 Collective on Mat 4675 4676 Input Parameters: 4677 + A,B - the matrices in mpiaij format 4678 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4679 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4680 . startsj_r - similar to startsj for receives 4681 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4682 4683 Output Parameter: 4684 + B_oth - the sequential matrix generated 4685 4686 Level: developer 4687 4688 @*/ 4689 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4690 { 4691 VecScatter_MPI_General *gen_to,*gen_from; 4692 PetscErrorCode ierr; 4693 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4694 Mat_SeqAIJ *b_oth; 4695 VecScatter ctx=a->Mvctx; 4696 MPI_Comm comm=((PetscObject)ctx)->comm; 4697 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4698 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4699 PetscScalar *rvalues,*svalues; 4700 MatScalar *b_otha,*bufa,*bufA; 4701 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4702 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4703 MPI_Status *sstatus,rstatus; 4704 PetscMPIInt jj; 4705 PetscInt *cols,sbs,rbs; 4706 PetscScalar *vals; 4707 4708 PetscFunctionBegin; 4709 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4710 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4711 } 4712 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4713 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4714 4715 gen_to = (VecScatter_MPI_General*)ctx->todata; 4716 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4717 rvalues = gen_from->values; /* holds the length of receiving row */ 4718 svalues = gen_to->values; /* holds the length of sending row */ 4719 nrecvs = gen_from->n; 4720 nsends = gen_to->n; 4721 4722 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4723 srow = gen_to->indices; /* local row index to be sent */ 4724 sstarts = gen_to->starts; 4725 sprocs = gen_to->procs; 4726 sstatus = gen_to->sstatus; 4727 sbs = gen_to->bs; 4728 rstarts = gen_from->starts; 4729 rprocs = gen_from->procs; 4730 rbs = gen_from->bs; 4731 4732 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4733 if (scall == MAT_INITIAL_MATRIX){ 4734 /* i-array */ 4735 /*---------*/ 4736 /* post receives */ 4737 for (i=0; i<nrecvs; i++){ 4738 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4739 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4740 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4741 } 4742 4743 /* pack the outgoing message */ 4744 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4745 sstartsj[0] = 0; rstartsj[0] = 0; 4746 len = 0; /* total length of j or a array to be sent */ 4747 k = 0; 4748 for (i=0; i<nsends; i++){ 4749 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4750 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4751 for (j=0; j<nrows; j++) { 4752 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4753 for (l=0; l<sbs; l++){ 4754 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4755 rowlen[j*sbs+l] = ncols; 4756 len += ncols; 4757 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4758 } 4759 k++; 4760 } 4761 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4762 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4763 } 4764 /* recvs and sends of i-array are completed */ 4765 i = nrecvs; 4766 while (i--) { 4767 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4768 } 4769 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4770 4771 /* allocate buffers for sending j and a arrays */ 4772 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4773 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4774 4775 /* create i-array of B_oth */ 4776 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4777 b_othi[0] = 0; 4778 len = 0; /* total length of j or a array to be received */ 4779 k = 0; 4780 for (i=0; i<nrecvs; i++){ 4781 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4782 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4783 for (j=0; j<nrows; j++) { 4784 b_othi[k+1] = b_othi[k] + rowlen[j]; 4785 len += rowlen[j]; k++; 4786 } 4787 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4788 } 4789 4790 /* allocate space for j and a arrrays of B_oth */ 4791 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4792 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4793 4794 /* j-array */ 4795 /*---------*/ 4796 /* post receives of j-array */ 4797 for (i=0; i<nrecvs; i++){ 4798 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4799 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4800 } 4801 4802 /* pack the outgoing message j-array */ 4803 k = 0; 4804 for (i=0; i<nsends; i++){ 4805 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4806 bufJ = bufj+sstartsj[i]; 4807 for (j=0; j<nrows; j++) { 4808 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4809 for (ll=0; ll<sbs; ll++){ 4810 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4811 for (l=0; l<ncols; l++){ 4812 *bufJ++ = cols[l]; 4813 } 4814 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4815 } 4816 } 4817 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4818 } 4819 4820 /* recvs and sends of j-array are completed */ 4821 i = nrecvs; 4822 while (i--) { 4823 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4824 } 4825 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4826 } else if (scall == MAT_REUSE_MATRIX){ 4827 sstartsj = *startsj; 4828 rstartsj = *startsj_r; 4829 bufa = *bufa_ptr; 4830 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4831 b_otha = b_oth->a; 4832 } else { 4833 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4834 } 4835 4836 /* a-array */ 4837 /*---------*/ 4838 /* post receives of a-array */ 4839 for (i=0; i<nrecvs; i++){ 4840 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4841 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4842 } 4843 4844 /* pack the outgoing message a-array */ 4845 k = 0; 4846 for (i=0; i<nsends; i++){ 4847 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4848 bufA = bufa+sstartsj[i]; 4849 for (j=0; j<nrows; j++) { 4850 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4851 for (ll=0; ll<sbs; ll++){ 4852 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4853 for (l=0; l<ncols; l++){ 4854 *bufA++ = vals[l]; 4855 } 4856 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4857 } 4858 } 4859 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4860 } 4861 /* recvs and sends of a-array are completed */ 4862 i = nrecvs; 4863 while (i--) { 4864 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4865 } 4866 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4867 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4868 4869 if (scall == MAT_INITIAL_MATRIX){ 4870 /* put together the new matrix */ 4871 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4872 4873 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4874 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4875 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4876 b_oth->free_a = PETSC_TRUE; 4877 b_oth->free_ij = PETSC_TRUE; 4878 b_oth->nonew = 0; 4879 4880 ierr = PetscFree(bufj);CHKERRQ(ierr); 4881 if (!startsj || !bufa_ptr){ 4882 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4883 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4884 } else { 4885 *startsj = sstartsj; 4886 *startsj_r = rstartsj; 4887 *bufa_ptr = bufa; 4888 } 4889 } 4890 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4891 PetscFunctionReturn(0); 4892 } 4893 4894 #undef __FUNCT__ 4895 #define __FUNCT__ "MatGetCommunicationStructs" 4896 /*@C 4897 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4898 4899 Not Collective 4900 4901 Input Parameters: 4902 . A - The matrix in mpiaij format 4903 4904 Output Parameter: 4905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4906 . colmap - A map from global column index to local index into lvec 4907 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4908 4909 Level: developer 4910 4911 @*/ 4912 #if defined (PETSC_USE_CTABLE) 4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4914 #else 4915 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4916 #endif 4917 { 4918 Mat_MPIAIJ *a; 4919 4920 PetscFunctionBegin; 4921 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4922 PetscValidPointer(lvec, 2); 4923 PetscValidPointer(colmap, 3); 4924 PetscValidPointer(multScatter, 4); 4925 a = (Mat_MPIAIJ *) A->data; 4926 if (lvec) *lvec = a->lvec; 4927 if (colmap) *colmap = a->colmap; 4928 if (multScatter) *multScatter = a->Mvctx; 4929 PetscFunctionReturn(0); 4930 } 4931 4932 EXTERN_C_BEGIN 4933 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4936 EXTERN_C_END 4937 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4940 /* 4941 Computes (B'*A')' since computing B*A directly is untenable 4942 4943 n p p 4944 ( ) ( ) ( ) 4945 m ( A ) * n ( B ) = m ( C ) 4946 ( ) ( ) ( ) 4947 4948 */ 4949 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4950 { 4951 PetscErrorCode ierr; 4952 Mat At,Bt,Ct; 4953 4954 PetscFunctionBegin; 4955 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4956 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4957 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4958 ierr = MatDestroy(At);CHKERRQ(ierr); 4959 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4960 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4961 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 #undef __FUNCT__ 4966 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4967 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4968 { 4969 PetscErrorCode ierr; 4970 PetscInt m=A->rmap->n,n=B->cmap->n; 4971 Mat Cmat; 4972 4973 PetscFunctionBegin; 4974 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4975 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4976 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4977 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4978 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4979 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4980 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4981 *C = Cmat; 4982 PetscFunctionReturn(0); 4983 } 4984 4985 /* ----------------------------------------------------------------*/ 4986 #undef __FUNCT__ 4987 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4988 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4989 { 4990 PetscErrorCode ierr; 4991 4992 PetscFunctionBegin; 4993 if (scall == MAT_INITIAL_MATRIX){ 4994 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4995 } 4996 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 5000 EXTERN_C_BEGIN 5001 #if defined(PETSC_HAVE_MUMPS) 5002 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5003 #endif 5004 #if defined(PETSC_HAVE_PASTIX) 5005 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5006 #endif 5007 #if defined(PETSC_HAVE_SUPERLU_DIST) 5008 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5009 #endif 5010 #if defined(PETSC_HAVE_SPOOLES) 5011 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5012 #endif 5013 EXTERN_C_END 5014 5015 /*MC 5016 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5017 5018 Options Database Keys: 5019 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5020 5021 Level: beginner 5022 5023 .seealso: MatCreateMPIAIJ() 5024 M*/ 5025 5026 EXTERN_C_BEGIN 5027 #undef __FUNCT__ 5028 #define __FUNCT__ "MatCreate_MPIAIJ" 5029 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5030 { 5031 Mat_MPIAIJ *b; 5032 PetscErrorCode ierr; 5033 PetscMPIInt size; 5034 5035 PetscFunctionBegin; 5036 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5037 5038 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5039 B->data = (void*)b; 5040 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5041 B->rmap->bs = 1; 5042 B->assembled = PETSC_FALSE; 5043 B->mapping = 0; 5044 5045 B->insertmode = NOT_SET_VALUES; 5046 b->size = size; 5047 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5048 5049 /* build cache for off array entries formed */ 5050 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5051 b->donotstash = PETSC_FALSE; 5052 b->colmap = 0; 5053 b->garray = 0; 5054 b->roworiented = PETSC_TRUE; 5055 5056 /* stuff used for matrix vector multiply */ 5057 b->lvec = PETSC_NULL; 5058 b->Mvctx = PETSC_NULL; 5059 5060 /* stuff for MatGetRow() */ 5061 b->rowindices = 0; 5062 b->rowvalues = 0; 5063 b->getrowactive = PETSC_FALSE; 5064 5065 #if defined(PETSC_HAVE_SPOOLES) 5066 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5067 "MatGetFactor_mpiaij_spooles", 5068 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5069 #endif 5070 #if defined(PETSC_HAVE_MUMPS) 5071 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5072 "MatGetFactor_aij_mumps", 5073 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5074 #endif 5075 #if defined(PETSC_HAVE_PASTIX) 5076 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5077 "MatGetFactor_mpiaij_pastix", 5078 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5079 #endif 5080 #if defined(PETSC_HAVE_SUPERLU_DIST) 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5082 "MatGetFactor_mpiaij_superlu_dist", 5083 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5084 #endif 5085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5086 "MatStoreValues_MPIAIJ", 5087 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5089 "MatRetrieveValues_MPIAIJ", 5090 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5092 "MatGetDiagonalBlock_MPIAIJ", 5093 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5095 "MatIsTranspose_MPIAIJ", 5096 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5098 "MatMPIAIJSetPreallocation_MPIAIJ", 5099 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5101 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5102 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5104 "MatDiagonalScaleLocal_MPIAIJ", 5105 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5107 "MatConvert_MPIAIJ_MPICSRPERM", 5108 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5110 "MatConvert_MPIAIJ_MPICRL", 5111 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5113 "MatConvert_MPIAIJ_MPISBAIJ", 5114 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5116 "MatMatMult_MPIDense_MPIAIJ", 5117 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5119 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5120 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5122 "MatMatMultNumeric_MPIDense_MPIAIJ", 5123 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5124 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5125 PetscFunctionReturn(0); 5126 } 5127 EXTERN_C_END 5128 5129 #undef __FUNCT__ 5130 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5131 /*@ 5132 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5133 and "off-diagonal" part of the matrix in CSR format. 5134 5135 Collective on MPI_Comm 5136 5137 Input Parameters: 5138 + comm - MPI communicator 5139 . m - number of local rows (Cannot be PETSC_DECIDE) 5140 . n - This value should be the same as the local size used in creating the 5141 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5142 calculated if N is given) For square matrices n is almost always m. 5143 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5144 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5145 . i - row indices for "diagonal" portion of matrix 5146 . j - column indices 5147 . a - matrix values 5148 . oi - row indices for "off-diagonal" portion of matrix 5149 . oj - column indices 5150 - oa - matrix values 5151 5152 Output Parameter: 5153 . mat - the matrix 5154 5155 Level: advanced 5156 5157 Notes: 5158 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5159 5160 The i and j indices are 0 based 5161 5162 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5163 5164 This sets local rows and cannot be used to set off-processor values. 5165 5166 You cannot later use MatSetValues() to change values in this matrix. 5167 5168 .keywords: matrix, aij, compressed row, sparse, parallel 5169 5170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5171 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5172 @*/ 5173 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5174 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5175 { 5176 PetscErrorCode ierr; 5177 Mat_MPIAIJ *maij; 5178 5179 PetscFunctionBegin; 5180 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5181 if (i[0]) { 5182 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5183 } 5184 if (oi[0]) { 5185 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5186 } 5187 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5188 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5189 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5190 maij = (Mat_MPIAIJ*) (*mat)->data; 5191 maij->donotstash = PETSC_TRUE; 5192 (*mat)->preallocated = PETSC_TRUE; 5193 5194 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5195 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5196 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5197 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5198 5199 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5200 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5201 5202 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5206 5207 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5208 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5209 PetscFunctionReturn(0); 5210 } 5211 5212 /* 5213 Special version for direct calls from Fortran 5214 */ 5215 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5216 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5217 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5218 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5219 #endif 5220 5221 /* Change these macros so can be used in void function */ 5222 #undef CHKERRQ 5223 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5224 #undef SETERRQ2 5225 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5226 #undef SETERRQ 5227 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5228 5229 EXTERN_C_BEGIN 5230 #undef __FUNCT__ 5231 #define __FUNCT__ "matsetvaluesmpiaij_" 5232 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5233 { 5234 Mat mat = *mmat; 5235 PetscInt m = *mm, n = *mn; 5236 InsertMode addv = *maddv; 5237 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5238 PetscScalar value; 5239 PetscErrorCode ierr; 5240 5241 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5242 if (mat->insertmode == NOT_SET_VALUES) { 5243 mat->insertmode = addv; 5244 } 5245 #if defined(PETSC_USE_DEBUG) 5246 else if (mat->insertmode != addv) { 5247 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5248 } 5249 #endif 5250 { 5251 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5252 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5253 PetscTruth roworiented = aij->roworiented; 5254 5255 /* Some Variables required in the macro */ 5256 Mat A = aij->A; 5257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5258 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5259 MatScalar *aa = a->a; 5260 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5261 Mat B = aij->B; 5262 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5263 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5264 MatScalar *ba = b->a; 5265 5266 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5267 PetscInt nonew = a->nonew; 5268 MatScalar *ap1,*ap2; 5269 5270 PetscFunctionBegin; 5271 for (i=0; i<m; i++) { 5272 if (im[i] < 0) continue; 5273 #if defined(PETSC_USE_DEBUG) 5274 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5275 #endif 5276 if (im[i] >= rstart && im[i] < rend) { 5277 row = im[i] - rstart; 5278 lastcol1 = -1; 5279 rp1 = aj + ai[row]; 5280 ap1 = aa + ai[row]; 5281 rmax1 = aimax[row]; 5282 nrow1 = ailen[row]; 5283 low1 = 0; 5284 high1 = nrow1; 5285 lastcol2 = -1; 5286 rp2 = bj + bi[row]; 5287 ap2 = ba + bi[row]; 5288 rmax2 = bimax[row]; 5289 nrow2 = bilen[row]; 5290 low2 = 0; 5291 high2 = nrow2; 5292 5293 for (j=0; j<n; j++) { 5294 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5295 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5296 if (in[j] >= cstart && in[j] < cend){ 5297 col = in[j] - cstart; 5298 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5299 } else if (in[j] < 0) continue; 5300 #if defined(PETSC_USE_DEBUG) 5301 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5302 #endif 5303 else { 5304 if (mat->was_assembled) { 5305 if (!aij->colmap) { 5306 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5307 } 5308 #if defined (PETSC_USE_CTABLE) 5309 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5310 col--; 5311 #else 5312 col = aij->colmap[in[j]] - 1; 5313 #endif 5314 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5315 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5316 col = in[j]; 5317 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5318 B = aij->B; 5319 b = (Mat_SeqAIJ*)B->data; 5320 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5321 rp2 = bj + bi[row]; 5322 ap2 = ba + bi[row]; 5323 rmax2 = bimax[row]; 5324 nrow2 = bilen[row]; 5325 low2 = 0; 5326 high2 = nrow2; 5327 bm = aij->B->rmap->n; 5328 ba = b->a; 5329 } 5330 } else col = in[j]; 5331 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5332 } 5333 } 5334 } else { 5335 if (!aij->donotstash) { 5336 if (roworiented) { 5337 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5338 } else { 5339 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5340 } 5341 } 5342 } 5343 }} 5344 PetscFunctionReturnVoid(); 5345 } 5346 EXTERN_C_END 5347 5348