1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SYMMETRIC: 1442 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1446 break; 1447 case MAT_HERMITIAN: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetRow_MPIAIJ" 1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1465 PetscErrorCode ierr; 1466 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1467 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1468 PetscInt *cmap,*idx_p; 1469 1470 PetscFunctionBegin; 1471 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1472 mat->getrowactive = PETSC_TRUE; 1473 1474 if (!mat->rowvalues && (idx || v)) { 1475 /* 1476 allocate enough space to hold information from the longest row. 1477 */ 1478 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1479 PetscInt max = 1,tmp; 1480 for (i=0; i<matin->rmap->n; i++) { 1481 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1482 if (max < tmp) { max = tmp; } 1483 } 1484 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1485 } 1486 1487 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1488 lrow = row - rstart; 1489 1490 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1491 if (!v) {pvA = 0; pvB = 0;} 1492 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1493 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1495 nztot = nzA + nzB; 1496 1497 cmap = mat->garray; 1498 if (v || idx) { 1499 if (nztot) { 1500 /* Sort by increasing column numbers, assuming A and B already sorted */ 1501 PetscInt imark = -1; 1502 if (v) { 1503 *v = v_p = mat->rowvalues; 1504 for (i=0; i<nzB; i++) { 1505 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1506 else break; 1507 } 1508 imark = i; 1509 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1510 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1511 } 1512 if (idx) { 1513 *idx = idx_p = mat->rowindices; 1514 if (imark > -1) { 1515 for (i=0; i<imark; i++) { 1516 idx_p[i] = cmap[cworkB[i]]; 1517 } 1518 } else { 1519 for (i=0; i<nzB; i++) { 1520 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1521 else break; 1522 } 1523 imark = i; 1524 } 1525 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1526 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 if (idx) *idx = 0; 1530 if (v) *v = 0; 1531 } 1532 } 1533 *nz = nztot; 1534 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1542 { 1543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1544 1545 PetscFunctionBegin; 1546 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1547 aij->getrowactive = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatNorm_MPIAIJ" 1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1556 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1557 PetscErrorCode ierr; 1558 PetscInt i,j,cstart = mat->cmap->rstart; 1559 PetscReal sum = 0.0; 1560 MatScalar *v; 1561 1562 PetscFunctionBegin; 1563 if (aij->size == 1) { 1564 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1565 } else { 1566 if (type == NORM_FROBENIUS) { 1567 v = amat->a; 1568 for (i=0; i<amat->nz; i++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1571 #else 1572 sum += (*v)*(*v); v++; 1573 #endif 1574 } 1575 v = bmat->a; 1576 for (i=0; i<bmat->nz; i++) { 1577 #if defined(PETSC_USE_COMPLEX) 1578 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1579 #else 1580 sum += (*v)*(*v); v++; 1581 #endif 1582 } 1583 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1584 *norm = sqrt(*norm); 1585 } else if (type == NORM_1) { /* max column norm */ 1586 PetscReal *tmp,*tmp2; 1587 PetscInt *jj,*garray = aij->garray; 1588 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1589 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1590 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1591 *norm = 0.0; 1592 v = amat->a; jj = amat->j; 1593 for (j=0; j<amat->nz; j++) { 1594 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1595 } 1596 v = bmat->a; jj = bmat->j; 1597 for (j=0; j<bmat->nz; j++) { 1598 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1599 } 1600 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 for (j=0; j<mat->cmap->N; j++) { 1602 if (tmp2[j] > *norm) *norm = tmp2[j]; 1603 } 1604 ierr = PetscFree(tmp);CHKERRQ(ierr); 1605 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1606 } else if (type == NORM_INFINITY) { /* max row norm */ 1607 PetscReal ntemp = 0.0; 1608 for (j=0; j<aij->A->rmap->n; j++) { 1609 v = amat->a + amat->i[j]; 1610 sum = 0.0; 1611 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1612 sum += PetscAbsScalar(*v); v++; 1613 } 1614 v = bmat->a + bmat->i[j]; 1615 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 if (sum > ntemp) ntemp = sum; 1619 } 1620 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1621 } else { 1622 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1623 } 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIAIJ" 1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1633 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1634 PetscErrorCode ierr; 1635 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1636 PetscInt cstart=A->cmap->rstart,ncol; 1637 Mat B; 1638 MatScalar *array; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 1643 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1644 ai = Aloc->i; aj = Aloc->j; 1645 bi = Bloc->i; bj = Bloc->j; 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1648 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1649 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1650 for (i=0; i<ai[ma]; i++){ 1651 d_nnz[aj[i]] ++; 1652 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1653 } 1654 1655 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1656 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1657 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1658 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1659 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1660 } else { 1661 B = *matout; 1662 } 1663 1664 /* copy over the A part */ 1665 array = Aloc->a; 1666 row = A->rmap->rstart; 1667 for (i=0; i<ma; i++) { 1668 ncol = ai[i+1]-ai[i]; 1669 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1670 row++; array += ncol; aj += ncol; 1671 } 1672 aj = Aloc->j; 1673 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1674 1675 /* copy over the B part */ 1676 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1677 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1678 array = Bloc->a; 1679 row = A->rmap->rstart; 1680 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1681 cols_tmp = cols; 1682 for (i=0; i<mb; i++) { 1683 ncol = bi[i+1]-bi[i]; 1684 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1685 row++; array += ncol; cols_tmp += ncol; 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1692 *matout = B; 1693 } else { 1694 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1702 { 1703 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1704 Mat a = aij->A,b = aij->B; 1705 PetscErrorCode ierr; 1706 PetscInt s1,s2,s3; 1707 1708 PetscFunctionBegin; 1709 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1710 if (rr) { 1711 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1712 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1713 /* Overlap communication with computation. */ 1714 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 } 1716 if (ll) { 1717 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1718 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1719 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1720 } 1721 /* scale the diagonal block */ 1722 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1723 1724 if (rr) { 1725 /* Do a scatter end and then right scale the off-diagonal block */ 1726 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1728 } 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1742 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1743 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1744 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1750 { 1751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatEqual_MPIAIJ" 1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1762 { 1763 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1764 Mat a,b,c,d; 1765 PetscTruth flg; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; b = matA->B; 1770 c = matB->A; d = matB->B; 1771 1772 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1773 if (flg) { 1774 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1775 } 1776 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCopy_MPIAIJ" 1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1783 { 1784 PetscErrorCode ierr; 1785 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1786 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1787 1788 PetscFunctionBegin; 1789 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1790 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1791 /* because of the column compression in the off-processor part of the matrix a->B, 1792 the number of columns in a->B and b->B may be different, hence we cannot call 1793 the MatCopy() directly on the two parts. If need be, we can provide a more 1794 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1795 then copying the submatrices */ 1796 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1799 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1807 { 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatAXPY_MPIAIJ" 1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 PetscInt i; 1821 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1822 PetscBLASInt bnz,one=1; 1823 Mat_SeqAIJ *x,*y; 1824 1825 PetscFunctionBegin; 1826 if (str == SAME_NONZERO_PATTERN) { 1827 PetscScalar alpha = a; 1828 x = (Mat_SeqAIJ *)xx->A->data; 1829 y = (Mat_SeqAIJ *)yy->A->data; 1830 bnz = PetscBLASIntCast(x->nz); 1831 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1832 x = (Mat_SeqAIJ *)xx->B->data; 1833 y = (Mat_SeqAIJ *)yy->B->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 } else if (str == SUBSET_NONZERO_PATTERN) { 1837 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1838 1839 x = (Mat_SeqAIJ *)xx->B->data; 1840 y = (Mat_SeqAIJ *)yy->B->data; 1841 if (y->xtoy && y->XtoY != xx->B) { 1842 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1843 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1844 } 1845 if (!y->xtoy) { /* get xtoy */ 1846 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1847 y->XtoY = xx->B; 1848 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1849 } 1850 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1851 } else { 1852 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatConjugate_MPIAIJ" 1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1862 { 1863 #if defined(PETSC_USE_COMPLEX) 1864 PetscErrorCode ierr; 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1866 1867 PetscFunctionBegin; 1868 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1869 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1870 #else 1871 PetscFunctionBegin; 1872 #endif 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatRealPart_MPIAIJ" 1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1879 { 1880 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1885 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1892 { 1893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 1896 PetscFunctionBegin; 1897 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1898 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #ifdef PETSC_HAVE_PBGL 1903 1904 #include <boost/parallel/mpi/bsp_process_group.hpp> 1905 #include <boost/graph/distributed/ilu_default_graph.hpp> 1906 #include <boost/graph/distributed/ilu_0_block.hpp> 1907 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1908 #include <boost/graph/distributed/petsc/interface.hpp> 1909 #include <boost/multi_array.hpp> 1910 #include <boost/parallel/distributed_property_map->hpp> 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1914 /* 1915 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1916 */ 1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1918 { 1919 namespace petsc = boost::distributed::petsc; 1920 1921 namespace graph_dist = boost::graph::distributed; 1922 using boost::graph::distributed::ilu_default::process_group_type; 1923 using boost::graph::ilu_permuted; 1924 1925 PetscTruth row_identity, col_identity; 1926 PetscContainer c; 1927 PetscInt m, n, M, N; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1932 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1933 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1934 if (!row_identity || !col_identity) { 1935 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1936 } 1937 1938 process_group_type pg; 1939 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1940 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1941 lgraph_type& level_graph = *lgraph_p; 1942 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1943 1944 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1945 ilu_permuted(level_graph); 1946 1947 /* put together the new matrix */ 1948 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1949 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1950 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1951 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1952 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 1956 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1957 ierr = PetscContainerSetPointer(c, lgraph_p); 1958 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1965 { 1966 PetscFunctionBegin; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatSolve_MPIAIJ" 1972 /* 1973 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1974 */ 1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1976 { 1977 namespace graph_dist = boost::graph::distributed; 1978 1979 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1980 lgraph_type* lgraph_p; 1981 PetscContainer c; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1986 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1987 ierr = VecCopy(b, x);CHKERRQ(ierr); 1988 1989 PetscScalar* array_x; 1990 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1991 PetscInt sx; 1992 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1993 1994 PetscScalar* array_b; 1995 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1996 PetscInt sb; 1997 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1998 1999 lgraph_type& level_graph = *lgraph_p; 2000 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2001 2002 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2003 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2004 ref_x(array_x, boost::extents[num_vertices(graph)]); 2005 2006 typedef boost::iterator_property_map<array_ref_type::iterator, 2007 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2008 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2009 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2010 2011 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2012 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2018 PetscInt nzlocal,nsends,nrecvs; 2019 PetscMPIInt *send_rank,*recv_rank; 2020 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2021 PetscScalar *sbuf_a,**rbuf_a; 2022 PetscErrorCode (*MatDestroy)(Mat); 2023 } Mat_Redundant; 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2028 { 2029 PetscErrorCode ierr; 2030 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2031 PetscInt i; 2032 2033 PetscFunctionBegin; 2034 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2035 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2036 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2037 for (i=0; i<redund->nrecvs; i++){ 2038 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2042 ierr = PetscFree(redund);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDestroy_MatRedundant" 2048 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2049 { 2050 PetscErrorCode ierr; 2051 PetscContainer container; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 2054 PetscFunctionBegin; 2055 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 A->ops->destroy = redund->MatDestroy; 2062 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2063 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2064 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2071 { 2072 PetscMPIInt rank,size; 2073 MPI_Comm comm=((PetscObject)mat)->comm; 2074 PetscErrorCode ierr; 2075 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2076 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2077 PetscInt *rowrange=mat->rmap->range; 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat A=aij->A,B=aij->B,C=*matredundant; 2080 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2081 PetscScalar *sbuf_a; 2082 PetscInt nzlocal=a->nz+b->nz; 2083 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2084 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2085 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2086 MatScalar *aworkA,*aworkB; 2087 PetscScalar *vals; 2088 PetscMPIInt tag1,tag2,tag3,imdex; 2089 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2090 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2091 MPI_Status recv_status,*send_status; 2092 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2093 PetscInt **rbuf_j=PETSC_NULL; 2094 PetscScalar **rbuf_a=PETSC_NULL; 2095 Mat_Redundant *redund=PETSC_NULL; 2096 PetscContainer container; 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (reuse == MAT_REUSE_MATRIX) { 2103 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2104 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2105 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2107 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2108 if (container) { 2109 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2110 } else { 2111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2112 } 2113 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2114 2115 nsends = redund->nsends; 2116 nrecvs = redund->nrecvs; 2117 send_rank = redund->send_rank; 2118 recv_rank = redund->recv_rank; 2119 sbuf_nz = redund->sbuf_nz; 2120 rbuf_nz = redund->rbuf_nz; 2121 sbuf_j = redund->sbuf_j; 2122 sbuf_a = redund->sbuf_a; 2123 rbuf_j = redund->rbuf_j; 2124 rbuf_a = redund->rbuf_a; 2125 } 2126 2127 if (reuse == MAT_INITIAL_MATRIX){ 2128 PetscMPIInt subrank,subsize; 2129 PetscInt nleftover,np_subcomm; 2130 /* get the destination processors' id send_rank, nsends and nrecvs */ 2131 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2132 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2133 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2134 np_subcomm = size/nsubcomm; 2135 nleftover = size - nsubcomm*np_subcomm; 2136 nsends = 0; nrecvs = 0; 2137 for (i=0; i<size; i++){ /* i=rank*/ 2138 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2139 send_rank[nsends] = i; nsends++; 2140 recv_rank[nrecvs++] = i; 2141 } 2142 } 2143 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2144 i = size-nleftover-1; 2145 j = 0; 2146 while (j < nsubcomm - nleftover){ 2147 send_rank[nsends++] = i; 2148 i--; j++; 2149 } 2150 } 2151 2152 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2153 for (i=0; i<nleftover; i++){ 2154 recv_rank[nrecvs++] = size-nleftover+i; 2155 } 2156 } 2157 2158 /* allocate sbuf_j, sbuf_a */ 2159 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2160 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2161 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2162 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2163 2164 /* copy mat's local entries into the buffers */ 2165 if (reuse == MAT_INITIAL_MATRIX){ 2166 rownz_max = 0; 2167 rptr = sbuf_j; 2168 cols = sbuf_j + rend-rstart + 1; 2169 vals = sbuf_a; 2170 rptr[0] = 0; 2171 for (i=0; i<rend-rstart; i++){ 2172 row = i + rstart; 2173 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2174 ncols = nzA + nzB; 2175 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2176 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2177 /* load the column indices for this row into cols */ 2178 lwrite = 0; 2179 for (l=0; l<nzB; l++) { 2180 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2181 vals[lwrite] = aworkB[l]; 2182 cols[lwrite++] = ctmp; 2183 } 2184 } 2185 for (l=0; l<nzA; l++){ 2186 vals[lwrite] = aworkA[l]; 2187 cols[lwrite++] = cstart + cworkA[l]; 2188 } 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 vals += ncols; 2196 cols += ncols; 2197 rptr[i+1] = rptr[i] + ncols; 2198 if (rownz_max < ncols) rownz_max = ncols; 2199 } 2200 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2201 } else { /* only copy matrix values into sbuf_a */ 2202 rptr = sbuf_j; 2203 vals = sbuf_a; 2204 rptr[0] = 0; 2205 for (i=0; i<rend-rstart; i++){ 2206 row = i + rstart; 2207 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2208 ncols = nzA + nzB; 2209 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2210 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2214 } 2215 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2218 } 2219 vals += ncols; 2220 rptr[i+1] = rptr[i] + ncols; 2221 } 2222 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2223 2224 /* send nzlocal to others, and recv other's nzlocal */ 2225 /*--------------------------------------------------*/ 2226 if (reuse == MAT_INITIAL_MATRIX){ 2227 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2228 s_waits2 = s_waits3 + nsends; 2229 s_waits1 = s_waits2 + nsends; 2230 r_waits1 = s_waits1 + nsends; 2231 r_waits2 = r_waits1 + nrecvs; 2232 r_waits3 = r_waits2 + nrecvs; 2233 } else { 2234 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2235 r_waits3 = s_waits3 + nsends; 2236 } 2237 2238 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2239 if (reuse == MAT_INITIAL_MATRIX){ 2240 /* get new tags to keep the communication clean */ 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2243 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2244 2245 /* post receives of other's nzlocal */ 2246 for (i=0; i<nrecvs; i++){ 2247 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2248 } 2249 /* send nzlocal to others */ 2250 for (i=0; i<nsends; i++){ 2251 sbuf_nz[i] = nzlocal; 2252 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2253 } 2254 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2255 count = nrecvs; 2256 while (count) { 2257 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2258 recv_rank[imdex] = recv_status.MPI_SOURCE; 2259 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2260 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2261 2262 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2263 rbuf_nz[imdex] += i + 2; 2264 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2265 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2266 count--; 2267 } 2268 /* wait on sends of nzlocal */ 2269 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2270 /* send mat->i,j to others, and recv from other's */ 2271 /*------------------------------------------------*/ 2272 for (i=0; i<nsends; i++){ 2273 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2274 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2275 } 2276 /* wait on receives of mat->i,j */ 2277 /*------------------------------*/ 2278 count = nrecvs; 2279 while (count) { 2280 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2281 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2282 count--; 2283 } 2284 /* wait on sends of mat->i,j */ 2285 /*---------------------------*/ 2286 if (nsends) { 2287 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2288 } 2289 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2290 2291 /* post receives, send and receive mat->a */ 2292 /*----------------------------------------*/ 2293 for (imdex=0; imdex<nrecvs; imdex++) { 2294 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2295 } 2296 for (i=0; i<nsends; i++){ 2297 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2298 } 2299 count = nrecvs; 2300 while (count) { 2301 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2302 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2303 count--; 2304 } 2305 if (nsends) { 2306 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2307 } 2308 2309 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2310 2311 /* create redundant matrix */ 2312 /*-------------------------*/ 2313 if (reuse == MAT_INITIAL_MATRIX){ 2314 /* compute rownz_max for preallocation */ 2315 for (imdex=0; imdex<nrecvs; imdex++){ 2316 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2317 rptr = rbuf_j[imdex]; 2318 for (i=0; i<j; i++){ 2319 ncols = rptr[i+1] - rptr[i]; 2320 if (rownz_max < ncols) rownz_max = ncols; 2321 } 2322 } 2323 2324 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2325 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2326 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2327 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2328 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2329 } else { 2330 C = *matredundant; 2331 } 2332 2333 /* insert local matrix entries */ 2334 rptr = sbuf_j; 2335 cols = sbuf_j + rend-rstart + 1; 2336 vals = sbuf_a; 2337 for (i=0; i<rend-rstart; i++){ 2338 row = i + rstart; 2339 ncols = rptr[i+1] - rptr[i]; 2340 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 vals += ncols; 2342 cols += ncols; 2343 } 2344 /* insert received matrix entries */ 2345 for (imdex=0; imdex<nrecvs; imdex++){ 2346 rstart = rowrange[recv_rank[imdex]]; 2347 rend = rowrange[recv_rank[imdex]+1]; 2348 rptr = rbuf_j[imdex]; 2349 cols = rbuf_j[imdex] + rend-rstart + 1; 2350 vals = rbuf_a[imdex]; 2351 for (i=0; i<rend-rstart; i++){ 2352 row = i + rstart; 2353 ncols = rptr[i+1] - rptr[i]; 2354 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 vals += ncols; 2356 cols += ncols; 2357 } 2358 } 2359 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2362 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2363 if (reuse == MAT_INITIAL_MATRIX){ 2364 PetscContainer container; 2365 *matredundant = C; 2366 /* create a supporting struct and attach it to C for reuse */ 2367 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2368 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2369 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2370 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2372 2373 redund->nzlocal = nzlocal; 2374 redund->nsends = nsends; 2375 redund->nrecvs = nrecvs; 2376 redund->send_rank = send_rank; 2377 redund->recv_rank = recv_rank; 2378 redund->sbuf_nz = sbuf_nz; 2379 redund->rbuf_nz = rbuf_nz; 2380 redund->sbuf_j = sbuf_j; 2381 redund->sbuf_a = sbuf_a; 2382 redund->rbuf_j = rbuf_j; 2383 redund->rbuf_a = rbuf_a; 2384 2385 redund->MatDestroy = C->ops->destroy; 2386 C->ops->destroy = MatDestroy_MatRedundant; 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->rmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++){ 2418 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 if (idxb) { 2427 ierr = PetscFree(idxb);CHKERRQ(ierr); 2428 } 2429 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 PetscErrorCode ierr; 2439 PetscInt i,*idxb = 0; 2440 PetscScalar *va,*vb; 2441 Vec vtmp; 2442 2443 PetscFunctionBegin; 2444 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2446 if (idx) { 2447 for (i=0; i<A->cmap->n; i++) { 2448 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2449 } 2450 } 2451 2452 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2453 if (idx) { 2454 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2455 } 2456 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2457 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2458 2459 for (i=0; i<A->rmap->n; i++){ 2460 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2461 va[i] = vb[i]; 2462 if (idx) idx[i] = a->garray[idxb[i]]; 2463 } 2464 } 2465 2466 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2468 if (idxb) { 2469 ierr = PetscFree(idxb);CHKERRQ(ierr); 2470 } 2471 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2478 { 2479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2480 PetscInt n = A->rmap->n; 2481 PetscInt cstart = A->cmap->rstart; 2482 PetscInt *cmap = mat->garray; 2483 PetscInt *diagIdx, *offdiagIdx; 2484 Vec diagV, offdiagV; 2485 PetscScalar *a, *diagA, *offdiagA; 2486 PetscInt r; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2492 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2493 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2494 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2496 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2497 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2498 for(r = 0; r < n; ++r) { 2499 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2500 a[r] = diagA[r]; 2501 idx[r] = cstart + diagIdx[r]; 2502 } else { 2503 a[r] = offdiagA[r]; 2504 idx[r] = cmap[offdiagIdx[r]]; 2505 } 2506 } 2507 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2508 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2509 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2510 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2511 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2512 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2519 { 2520 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2521 PetscInt n = A->rmap->n; 2522 PetscInt cstart = A->cmap->rstart; 2523 PetscInt *cmap = mat->garray; 2524 PetscInt *diagIdx, *offdiagIdx; 2525 Vec diagV, offdiagV; 2526 PetscScalar *a, *diagA, *offdiagA; 2527 PetscInt r; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2532 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2533 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2534 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2535 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2537 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2538 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2539 for(r = 0; r < n; ++r) { 2540 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 idx[r] = cstart + diagIdx[r]; 2543 } else { 2544 a[r] = offdiagA[r]; 2545 idx[r] = cmap[offdiagIdx[r]]; 2546 } 2547 } 2548 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2551 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2552 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2553 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2560 { 2561 PetscErrorCode ierr; 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2566 *newmat = *dummy; 2567 ierr = PetscFree(dummy);CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2572 /* -------------------------------------------------------------------*/ 2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2574 MatGetRow_MPIAIJ, 2575 MatRestoreRow_MPIAIJ, 2576 MatMult_MPIAIJ, 2577 /* 4*/ MatMultAdd_MPIAIJ, 2578 MatMultTranspose_MPIAIJ, 2579 MatMultTransposeAdd_MPIAIJ, 2580 #ifdef PETSC_HAVE_PBGL 2581 MatSolve_MPIAIJ, 2582 #else 2583 0, 2584 #endif 2585 0, 2586 0, 2587 /*10*/ 0, 2588 0, 2589 0, 2590 MatSOR_MPIAIJ, 2591 MatTranspose_MPIAIJ, 2592 /*15*/ MatGetInfo_MPIAIJ, 2593 MatEqual_MPIAIJ, 2594 MatGetDiagonal_MPIAIJ, 2595 MatDiagonalScale_MPIAIJ, 2596 MatNorm_MPIAIJ, 2597 /*20*/ MatAssemblyBegin_MPIAIJ, 2598 MatAssemblyEnd_MPIAIJ, 2599 MatSetOption_MPIAIJ, 2600 MatZeroEntries_MPIAIJ, 2601 /*24*/ MatZeroRows_MPIAIJ, 2602 0, 2603 #ifdef PETSC_HAVE_PBGL 2604 0, 2605 #else 2606 0, 2607 #endif 2608 0, 2609 0, 2610 /*29*/ MatSetUpPreallocation_MPIAIJ, 2611 #ifdef PETSC_HAVE_PBGL 2612 0, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 /*49*/ MatSetBlockSize_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 0, 2649 /*64*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 #if defined(PETSC_HAVE_ADIC) 2659 MatSetValuesAdic_MPIAIJ, 2660 #else 2661 0, 2662 #endif 2663 MatSetValuesAdifor_MPIAIJ, 2664 /*75*/ MatFDColoringApply_AIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*80*/ 0, 2670 0, 2671 0, 2672 /*83*/ 0, 2673 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2679 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2680 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2681 MatPtAP_Basic, 2682 MatPtAPSymbolic_MPIAIJ, 2683 /*94*/ MatPtAPNumeric_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*99*/ 0, 2689 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2690 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2691 MatConjugate_MPIAIJ, 2692 0, 2693 /*104*/MatSetValuesRow_MPIAIJ, 2694 MatRealPart_MPIAIJ, 2695 MatImaginaryPart_MPIAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 MatGetRedundantMatrix_MPIAIJ, 2700 MatGetRowMin_MPIAIJ, 2701 0, 2702 0, 2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /*119*/0, 2709 0, 2710 0, 2711 0, 2712 MatLoadnew_MPIAIJ 2713 }; 2714 2715 /* ----------------------------------------------------------------------------------------*/ 2716 2717 EXTERN_C_BEGIN 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2721 { 2722 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2727 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 EXTERN_C_END 2731 2732 EXTERN_C_BEGIN 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2736 { 2737 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2742 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 EXTERN_C_END 2746 2747 #include "petscpc.h" 2748 EXTERN_C_BEGIN 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2752 { 2753 Mat_MPIAIJ *b; 2754 PetscErrorCode ierr; 2755 PetscInt i; 2756 2757 PetscFunctionBegin; 2758 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2759 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2760 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2761 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2762 2763 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 if (d_nnz) { 2768 for (i=0; i<B->rmap->n; i++) { 2769 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2770 } 2771 } 2772 if (o_nnz) { 2773 for (i=0; i<B->rmap->n; i++) { 2774 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2775 } 2776 } 2777 b = (Mat_MPIAIJ*)B->data; 2778 2779 if (!B->preallocated) { 2780 /* Explicitly create 2 MATSEQAIJ matrices. */ 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2783 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2787 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2789 } 2790 2791 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2792 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2793 B->preallocated = PETSC_TRUE; 2794 PetscFunctionReturn(0); 2795 } 2796 EXTERN_C_END 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2801 { 2802 Mat mat; 2803 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 *newmat = 0; 2808 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2809 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2810 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2811 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2812 a = (Mat_MPIAIJ*)mat->data; 2813 2814 mat->factortype = matin->factortype; 2815 mat->rmap->bs = matin->rmap->bs; 2816 mat->assembled = PETSC_TRUE; 2817 mat->insertmode = NOT_SET_VALUES; 2818 mat->preallocated = PETSC_TRUE; 2819 2820 a->size = oldmat->size; 2821 a->rank = oldmat->rank; 2822 a->donotstash = oldmat->donotstash; 2823 a->roworiented = oldmat->roworiented; 2824 a->rowindices = 0; 2825 a->rowvalues = 0; 2826 a->getrowactive = PETSC_FALSE; 2827 2828 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2829 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2830 2831 if (oldmat->colmap) { 2832 #if defined (PETSC_USE_CTABLE) 2833 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2834 #else 2835 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2837 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2838 #endif 2839 } else a->colmap = 0; 2840 if (oldmat->garray) { 2841 PetscInt len; 2842 len = oldmat->B->cmap->n; 2843 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2845 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2846 } else a->garray = 0; 2847 2848 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2850 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2852 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2854 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2856 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2857 *newmat = mat; 2858 PetscFunctionReturn(0); 2859 } 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatLoadnew_MPIAIJ" 2863 PetscErrorCode MatLoadnew_MPIAIJ(PetscViewer viewer, Mat newMat) 2864 { 2865 PetscScalar *vals,*svals; 2866 MPI_Comm comm = ((PetscObject)viewer)->comm; 2867 MPI_Status status; 2868 PetscErrorCode ierr; 2869 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2870 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2871 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2872 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2873 PetscInt cend,cstart,n,*rowners,sizesset=1; 2874 int fd; 2875 2876 PetscFunctionBegin; 2877 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2878 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2879 if (!rank) { 2880 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2881 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2882 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2883 } 2884 2885 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2886 2887 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2888 M = header[1]; N = header[2]; 2889 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2890 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2891 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2892 2893 /* If global sizes are set, check if they are consistent with that given in the file */ 2894 if (sizesset) { 2895 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2896 } 2897 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2898 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2899 2900 /* determine ownership of all rows */ 2901 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2902 else m = newMat->rmap->n; /* Set by user */ 2903 2904 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2905 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2906 2907 /* First process needs enough room for process with most rows */ 2908 if (!rank) { 2909 mmax = rowners[1]; 2910 for (i=2; i<size; i++) { 2911 mmax = PetscMax(mmax,rowners[i]); 2912 } 2913 } else mmax = m; 2914 2915 rowners[0] = 0; 2916 for (i=2; i<=size; i++) { 2917 rowners[i] += rowners[i-1]; 2918 } 2919 rstart = rowners[rank]; 2920 rend = rowners[rank+1]; 2921 2922 /* distribute row lengths to all processors */ 2923 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2924 if (!rank) { 2925 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2926 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2927 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2928 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2929 for (j=0; j<m; j++) { 2930 procsnz[0] += ourlens[j]; 2931 } 2932 for (i=1; i<size; i++) { 2933 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2934 /* calculate the number of nonzeros on each processor */ 2935 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2936 procsnz[i] += rowlengths[j]; 2937 } 2938 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2939 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2940 } 2941 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2942 } else { 2943 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2944 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2945 } 2946 2947 if (!rank) { 2948 /* determine max buffer needed and allocate it */ 2949 maxnz = 0; 2950 for (i=0; i<size; i++) { 2951 maxnz = PetscMax(maxnz,procsnz[i]); 2952 } 2953 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2954 2955 /* read in my part of the matrix column indices */ 2956 nz = procsnz[0]; 2957 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2958 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2959 2960 /* read in every one elses and ship off */ 2961 for (i=1; i<size; i++) { 2962 nz = procsnz[i]; 2963 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2964 mpicnt = PetscMPIIntCast(nz); 2965 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2966 } 2967 ierr = PetscFree(cols);CHKERRQ(ierr); 2968 } else { 2969 /* determine buffer space needed for message */ 2970 nz = 0; 2971 for (i=0; i<m; i++) { 2972 nz += ourlens[i]; 2973 } 2974 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2975 2976 /* receive message of column indices*/ 2977 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2978 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2979 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2980 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2981 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2982 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2983 } 2984 2985 /* determine column ownership if matrix is not square */ 2986 if (N != M) { 2987 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2988 else n = newMat->cmap->n; 2989 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2990 cstart = cend - n; 2991 } else { 2992 cstart = rstart; 2993 cend = rend; 2994 n = cend - cstart; 2995 } 2996 2997 /* loop over local rows, determining number of off diagonal entries */ 2998 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2999 jj = 0; 3000 for (i=0; i<m; i++) { 3001 for (j=0; j<ourlens[i]; j++) { 3002 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3003 jj++; 3004 } 3005 } 3006 3007 for (i=0; i<m; i++) { 3008 ourlens[i] -= offlens[i]; 3009 } 3010 if (!sizesset) { 3011 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3012 } 3013 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3014 3015 for (i=0; i<m; i++) { 3016 ourlens[i] += offlens[i]; 3017 } 3018 3019 if (!rank) { 3020 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3021 3022 /* read in my part of the matrix numerical values */ 3023 nz = procsnz[0]; 3024 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3025 3026 /* insert into matrix */ 3027 jj = rstart; 3028 smycols = mycols; 3029 svals = vals; 3030 for (i=0; i<m; i++) { 3031 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3032 smycols += ourlens[i]; 3033 svals += ourlens[i]; 3034 jj++; 3035 } 3036 3037 /* read in other processors and ship out */ 3038 for (i=1; i<size; i++) { 3039 nz = procsnz[i]; 3040 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3041 mpicnt = PetscMPIIntCast(nz); 3042 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3043 } 3044 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3045 } else { 3046 /* receive numeric values */ 3047 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3048 3049 /* receive message of values*/ 3050 mpicnt = PetscMPIIntCast(nz); 3051 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr); 3052 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3053 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3054 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3055 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3056 3057 /* insert into matrix */ 3058 jj = rstart; 3059 smycols = mycols; 3060 svals = vals; 3061 for (i=0; i<m; i++) { 3062 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3063 smycols += ourlens[i]; 3064 svals += ourlens[i]; 3065 jj++; 3066 } 3067 } 3068 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3069 ierr = PetscFree(vals);CHKERRQ(ierr); 3070 ierr = PetscFree(mycols);CHKERRQ(ierr); 3071 ierr = PetscFree(rowners);CHKERRQ(ierr); 3072 3073 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3075 PetscFunctionReturn(0); 3076 } 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3080 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3081 { 3082 PetscErrorCode ierr; 3083 IS iscol_local; 3084 PetscInt csize; 3085 3086 PetscFunctionBegin; 3087 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3088 if (call == MAT_REUSE_MATRIX) { 3089 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3090 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3091 } else { 3092 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3093 } 3094 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3095 if (call == MAT_INITIAL_MATRIX) { 3096 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3097 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3098 } 3099 PetscFunctionReturn(0); 3100 } 3101 3102 #undef __FUNCT__ 3103 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3104 /* 3105 Not great since it makes two copies of the submatrix, first an SeqAIJ 3106 in local and then by concatenating the local matrices the end result. 3107 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3108 3109 Note: This requires a sequential iscol with all indices. 3110 */ 3111 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3112 { 3113 PetscErrorCode ierr; 3114 PetscMPIInt rank,size; 3115 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3116 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3117 Mat *local,M,Mreuse; 3118 MatScalar *vwork,*aa; 3119 MPI_Comm comm = ((PetscObject)mat)->comm; 3120 Mat_SeqAIJ *aij; 3121 3122 3123 PetscFunctionBegin; 3124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3125 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3126 3127 if (call == MAT_REUSE_MATRIX) { 3128 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3129 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3130 local = &Mreuse; 3131 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3132 } else { 3133 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3134 Mreuse = *local; 3135 ierr = PetscFree(local);CHKERRQ(ierr); 3136 } 3137 3138 /* 3139 m - number of local rows 3140 n - number of columns (same on all processors) 3141 rstart - first row in new global matrix generated 3142 */ 3143 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3144 if (call == MAT_INITIAL_MATRIX) { 3145 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3146 ii = aij->i; 3147 jj = aij->j; 3148 3149 /* 3150 Determine the number of non-zeros in the diagonal and off-diagonal 3151 portions of the matrix in order to do correct preallocation 3152 */ 3153 3154 /* first get start and end of "diagonal" columns */ 3155 if (csize == PETSC_DECIDE) { 3156 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3157 if (mglobal == n) { /* square matrix */ 3158 nlocal = m; 3159 } else { 3160 nlocal = n/size + ((n % size) > rank); 3161 } 3162 } else { 3163 nlocal = csize; 3164 } 3165 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3166 rstart = rend - nlocal; 3167 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3168 3169 /* next, compute all the lengths */ 3170 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3171 olens = dlens + m; 3172 for (i=0; i<m; i++) { 3173 jend = ii[i+1] - ii[i]; 3174 olen = 0; 3175 dlen = 0; 3176 for (j=0; j<jend; j++) { 3177 if (*jj < rstart || *jj >= rend) olen++; 3178 else dlen++; 3179 jj++; 3180 } 3181 olens[i] = olen; 3182 dlens[i] = dlen; 3183 } 3184 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3185 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3186 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3187 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3188 ierr = PetscFree(dlens);CHKERRQ(ierr); 3189 } else { 3190 PetscInt ml,nl; 3191 3192 M = *newmat; 3193 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3194 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3195 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3196 /* 3197 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3198 rather than the slower MatSetValues(). 3199 */ 3200 M->was_assembled = PETSC_TRUE; 3201 M->assembled = PETSC_FALSE; 3202 } 3203 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3204 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3205 ii = aij->i; 3206 jj = aij->j; 3207 aa = aij->a; 3208 for (i=0; i<m; i++) { 3209 row = rstart + i; 3210 nz = ii[i+1] - ii[i]; 3211 cwork = jj; jj += nz; 3212 vwork = aa; aa += nz; 3213 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3214 } 3215 3216 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3217 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3218 *newmat = M; 3219 3220 /* save submatrix used in processor for next request */ 3221 if (call == MAT_INITIAL_MATRIX) { 3222 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3223 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3224 } 3225 3226 PetscFunctionReturn(0); 3227 } 3228 3229 EXTERN_C_BEGIN 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3232 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3233 { 3234 PetscInt m,cstart, cend,j,nnz,i,d; 3235 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3236 const PetscInt *JJ; 3237 PetscScalar *values; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3242 3243 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3244 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3245 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3246 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3247 m = B->rmap->n; 3248 cstart = B->cmap->rstart; 3249 cend = B->cmap->rend; 3250 rstart = B->rmap->rstart; 3251 3252 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3253 3254 #if defined(PETSC_USE_DEBUGGING) 3255 for (i=0; i<m; i++) { 3256 nnz = Ii[i+1]- Ii[i]; 3257 JJ = J + Ii[i]; 3258 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3259 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3260 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3261 } 3262 #endif 3263 3264 for (i=0; i<m; i++) { 3265 nnz = Ii[i+1]- Ii[i]; 3266 JJ = J + Ii[i]; 3267 nnz_max = PetscMax(nnz_max,nnz); 3268 d = 0; 3269 for (j=0; j<nnz; j++) { 3270 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3271 } 3272 d_nnz[i] = d; 3273 o_nnz[i] = nnz - d; 3274 } 3275 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3276 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3277 3278 if (v) values = (PetscScalar*)v; 3279 else { 3280 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3281 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3282 } 3283 3284 for (i=0; i<m; i++) { 3285 ii = i + rstart; 3286 nnz = Ii[i+1]- Ii[i]; 3287 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3288 } 3289 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3291 3292 if (!v) { 3293 ierr = PetscFree(values);CHKERRQ(ierr); 3294 } 3295 PetscFunctionReturn(0); 3296 } 3297 EXTERN_C_END 3298 3299 #undef __FUNCT__ 3300 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3301 /*@ 3302 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3303 (the default parallel PETSc format). 3304 3305 Collective on MPI_Comm 3306 3307 Input Parameters: 3308 + B - the matrix 3309 . i - the indices into j for the start of each local row (starts with zero) 3310 . j - the column indices for each local row (starts with zero) 3311 - v - optional values in the matrix 3312 3313 Level: developer 3314 3315 Notes: 3316 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3317 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3318 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3319 3320 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3321 3322 The format which is used for the sparse matrix input, is equivalent to a 3323 row-major ordering.. i.e for the following matrix, the input data expected is 3324 as shown: 3325 3326 1 0 0 3327 2 0 3 P0 3328 ------- 3329 4 5 6 P1 3330 3331 Process0 [P0]: rows_owned=[0,1] 3332 i = {0,1,3} [size = nrow+1 = 2+1] 3333 j = {0,0,2} [size = nz = 6] 3334 v = {1,2,3} [size = nz = 6] 3335 3336 Process1 [P1]: rows_owned=[2] 3337 i = {0,3} [size = nrow+1 = 1+1] 3338 j = {0,1,2} [size = nz = 6] 3339 v = {4,5,6} [size = nz = 6] 3340 3341 .keywords: matrix, aij, compressed row, sparse, parallel 3342 3343 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3344 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3345 @*/ 3346 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3347 { 3348 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3349 3350 PetscFunctionBegin; 3351 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3352 if (f) { 3353 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3354 } 3355 PetscFunctionReturn(0); 3356 } 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3360 /*@C 3361 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3362 (the default parallel PETSc format). For good matrix assembly performance 3363 the user should preallocate the matrix storage by setting the parameters 3364 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3365 performance can be increased by more than a factor of 50. 3366 3367 Collective on MPI_Comm 3368 3369 Input Parameters: 3370 + A - the matrix 3371 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3372 (same value is used for all local rows) 3373 . d_nnz - array containing the number of nonzeros in the various rows of the 3374 DIAGONAL portion of the local submatrix (possibly different for each row) 3375 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3376 The size of this array is equal to the number of local rows, i.e 'm'. 3377 You must leave room for the diagonal entry even if it is zero. 3378 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3379 submatrix (same value is used for all local rows). 3380 - o_nnz - array containing the number of nonzeros in the various rows of the 3381 OFF-DIAGONAL portion of the local submatrix (possibly different for 3382 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3383 structure. The size of this array is equal to the number 3384 of local rows, i.e 'm'. 3385 3386 If the *_nnz parameter is given then the *_nz parameter is ignored 3387 3388 The AIJ format (also called the Yale sparse matrix format or 3389 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3390 storage. The stored row and column indices begin with zero. See the users manual for details. 3391 3392 The parallel matrix is partitioned such that the first m0 rows belong to 3393 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3394 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3395 3396 The DIAGONAL portion of the local submatrix of a processor can be defined 3397 as the submatrix which is obtained by extraction the part corresponding 3398 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3399 first row that belongs to the processor, and r2 is the last row belonging 3400 to the this processor. This is a square mxm matrix. The remaining portion 3401 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3402 3403 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3404 3405 You can call MatGetInfo() to get information on how effective the preallocation was; 3406 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3407 You can also run with the option -info and look for messages with the string 3408 malloc in them to see if additional memory allocation was needed. 3409 3410 Example usage: 3411 3412 Consider the following 8x8 matrix with 34 non-zero values, that is 3413 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3414 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3415 as follows: 3416 3417 .vb 3418 1 2 0 | 0 3 0 | 0 4 3419 Proc0 0 5 6 | 7 0 0 | 8 0 3420 9 0 10 | 11 0 0 | 12 0 3421 ------------------------------------- 3422 13 0 14 | 15 16 17 | 0 0 3423 Proc1 0 18 0 | 19 20 21 | 0 0 3424 0 0 0 | 22 23 0 | 24 0 3425 ------------------------------------- 3426 Proc2 25 26 27 | 0 0 28 | 29 0 3427 30 0 0 | 31 32 33 | 0 34 3428 .ve 3429 3430 This can be represented as a collection of submatrices as: 3431 3432 .vb 3433 A B C 3434 D E F 3435 G H I 3436 .ve 3437 3438 Where the submatrices A,B,C are owned by proc0, D,E,F are 3439 owned by proc1, G,H,I are owned by proc2. 3440 3441 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3442 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3443 The 'M','N' parameters are 8,8, and have the same values on all procs. 3444 3445 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3446 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3447 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3448 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3449 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3450 matrix, ans [DF] as another SeqAIJ matrix. 3451 3452 When d_nz, o_nz parameters are specified, d_nz storage elements are 3453 allocated for every row of the local diagonal submatrix, and o_nz 3454 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3455 One way to choose d_nz and o_nz is to use the max nonzerors per local 3456 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3457 In this case, the values of d_nz,o_nz are: 3458 .vb 3459 proc0 : dnz = 2, o_nz = 2 3460 proc1 : dnz = 3, o_nz = 2 3461 proc2 : dnz = 1, o_nz = 4 3462 .ve 3463 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3464 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3465 for proc3. i.e we are using 12+15+10=37 storage locations to store 3466 34 values. 3467 3468 When d_nnz, o_nnz parameters are specified, the storage is specified 3469 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3470 In the above case the values for d_nnz,o_nnz are: 3471 .vb 3472 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3473 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3474 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3475 .ve 3476 Here the space allocated is sum of all the above values i.e 34, and 3477 hence pre-allocation is perfect. 3478 3479 Level: intermediate 3480 3481 .keywords: matrix, aij, compressed row, sparse, parallel 3482 3483 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3484 MPIAIJ, MatGetInfo() 3485 @*/ 3486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3487 { 3488 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3489 3490 PetscFunctionBegin; 3491 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3492 if (f) { 3493 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3494 } 3495 PetscFunctionReturn(0); 3496 } 3497 3498 #undef __FUNCT__ 3499 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3500 /*@ 3501 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3502 CSR format the local rows. 3503 3504 Collective on MPI_Comm 3505 3506 Input Parameters: 3507 + comm - MPI communicator 3508 . m - number of local rows (Cannot be PETSC_DECIDE) 3509 . n - This value should be the same as the local size used in creating the 3510 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3511 calculated if N is given) For square matrices n is almost always m. 3512 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3513 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3514 . i - row indices 3515 . j - column indices 3516 - a - matrix values 3517 3518 Output Parameter: 3519 . mat - the matrix 3520 3521 Level: intermediate 3522 3523 Notes: 3524 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3525 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3526 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3527 3528 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3529 3530 The format which is used for the sparse matrix input, is equivalent to a 3531 row-major ordering.. i.e for the following matrix, the input data expected is 3532 as shown: 3533 3534 1 0 0 3535 2 0 3 P0 3536 ------- 3537 4 5 6 P1 3538 3539 Process0 [P0]: rows_owned=[0,1] 3540 i = {0,1,3} [size = nrow+1 = 2+1] 3541 j = {0,0,2} [size = nz = 6] 3542 v = {1,2,3} [size = nz = 6] 3543 3544 Process1 [P1]: rows_owned=[2] 3545 i = {0,3} [size = nrow+1 = 1+1] 3546 j = {0,1,2} [size = nz = 6] 3547 v = {4,5,6} [size = nz = 6] 3548 3549 .keywords: matrix, aij, compressed row, sparse, parallel 3550 3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3552 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3553 @*/ 3554 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3555 { 3556 PetscErrorCode ierr; 3557 3558 PetscFunctionBegin; 3559 if (i[0]) { 3560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3561 } 3562 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3563 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3564 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3565 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3566 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3567 PetscFunctionReturn(0); 3568 } 3569 3570 #undef __FUNCT__ 3571 #define __FUNCT__ "MatCreateMPIAIJ" 3572 /*@C 3573 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3574 (the default parallel PETSc format). For good matrix assembly performance 3575 the user should preallocate the matrix storage by setting the parameters 3576 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3577 performance can be increased by more than a factor of 50. 3578 3579 Collective on MPI_Comm 3580 3581 Input Parameters: 3582 + comm - MPI communicator 3583 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3584 This value should be the same as the local size used in creating the 3585 y vector for the matrix-vector product y = Ax. 3586 . n - This value should be the same as the local size used in creating the 3587 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3588 calculated if N is given) For square matrices n is almost always m. 3589 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3590 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3591 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3592 (same value is used for all local rows) 3593 . d_nnz - array containing the number of nonzeros in the various rows of the 3594 DIAGONAL portion of the local submatrix (possibly different for each row) 3595 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3596 The size of this array is equal to the number of local rows, i.e 'm'. 3597 You must leave room for the diagonal entry even if it is zero. 3598 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3599 submatrix (same value is used for all local rows). 3600 - o_nnz - array containing the number of nonzeros in the various rows of the 3601 OFF-DIAGONAL portion of the local submatrix (possibly different for 3602 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3603 structure. The size of this array is equal to the number 3604 of local rows, i.e 'm'. 3605 3606 Output Parameter: 3607 . A - the matrix 3608 3609 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3610 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3611 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3612 3613 Notes: 3614 If the *_nnz parameter is given then the *_nz parameter is ignored 3615 3616 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3617 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3618 storage requirements for this matrix. 3619 3620 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3621 processor than it must be used on all processors that share the object for 3622 that argument. 3623 3624 The user MUST specify either the local or global matrix dimensions 3625 (possibly both). 3626 3627 The parallel matrix is partitioned across processors such that the 3628 first m0 rows belong to process 0, the next m1 rows belong to 3629 process 1, the next m2 rows belong to process 2 etc.. where 3630 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3631 values corresponding to [m x N] submatrix. 3632 3633 The columns are logically partitioned with the n0 columns belonging 3634 to 0th partition, the next n1 columns belonging to the next 3635 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3636 3637 The DIAGONAL portion of the local submatrix on any given processor 3638 is the submatrix corresponding to the rows and columns m,n 3639 corresponding to the given processor. i.e diagonal matrix on 3640 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3641 etc. The remaining portion of the local submatrix [m x (N-n)] 3642 constitute the OFF-DIAGONAL portion. The example below better 3643 illustrates this concept. 3644 3645 For a square global matrix we define each processor's diagonal portion 3646 to be its local rows and the corresponding columns (a square submatrix); 3647 each processor's off-diagonal portion encompasses the remainder of the 3648 local matrix (a rectangular submatrix). 3649 3650 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3651 3652 When calling this routine with a single process communicator, a matrix of 3653 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3654 type of communicator, use the construction mechanism: 3655 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3656 3657 By default, this format uses inodes (identical nodes) when possible. 3658 We search for consecutive rows with the same nonzero structure, thereby 3659 reusing matrix information to achieve increased efficiency. 3660 3661 Options Database Keys: 3662 + -mat_no_inode - Do not use inodes 3663 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3664 - -mat_aij_oneindex - Internally use indexing starting at 1 3665 rather than 0. Note that when calling MatSetValues(), 3666 the user still MUST index entries starting at 0! 3667 3668 3669 Example usage: 3670 3671 Consider the following 8x8 matrix with 34 non-zero values, that is 3672 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3673 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3674 as follows: 3675 3676 .vb 3677 1 2 0 | 0 3 0 | 0 4 3678 Proc0 0 5 6 | 7 0 0 | 8 0 3679 9 0 10 | 11 0 0 | 12 0 3680 ------------------------------------- 3681 13 0 14 | 15 16 17 | 0 0 3682 Proc1 0 18 0 | 19 20 21 | 0 0 3683 0 0 0 | 22 23 0 | 24 0 3684 ------------------------------------- 3685 Proc2 25 26 27 | 0 0 28 | 29 0 3686 30 0 0 | 31 32 33 | 0 34 3687 .ve 3688 3689 This can be represented as a collection of submatrices as: 3690 3691 .vb 3692 A B C 3693 D E F 3694 G H I 3695 .ve 3696 3697 Where the submatrices A,B,C are owned by proc0, D,E,F are 3698 owned by proc1, G,H,I are owned by proc2. 3699 3700 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3701 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3702 The 'M','N' parameters are 8,8, and have the same values on all procs. 3703 3704 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3705 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3706 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3707 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3708 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3709 matrix, ans [DF] as another SeqAIJ matrix. 3710 3711 When d_nz, o_nz parameters are specified, d_nz storage elements are 3712 allocated for every row of the local diagonal submatrix, and o_nz 3713 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3714 One way to choose d_nz and o_nz is to use the max nonzerors per local 3715 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3716 In this case, the values of d_nz,o_nz are: 3717 .vb 3718 proc0 : dnz = 2, o_nz = 2 3719 proc1 : dnz = 3, o_nz = 2 3720 proc2 : dnz = 1, o_nz = 4 3721 .ve 3722 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3723 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3724 for proc3. i.e we are using 12+15+10=37 storage locations to store 3725 34 values. 3726 3727 When d_nnz, o_nnz parameters are specified, the storage is specified 3728 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3729 In the above case the values for d_nnz,o_nnz are: 3730 .vb 3731 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3732 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3733 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3734 .ve 3735 Here the space allocated is sum of all the above values i.e 34, and 3736 hence pre-allocation is perfect. 3737 3738 Level: intermediate 3739 3740 .keywords: matrix, aij, compressed row, sparse, parallel 3741 3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3743 MPIAIJ, MatCreateMPIAIJWithArrays() 3744 @*/ 3745 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3746 { 3747 PetscErrorCode ierr; 3748 PetscMPIInt size; 3749 3750 PetscFunctionBegin; 3751 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3752 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3754 if (size > 1) { 3755 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3756 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3759 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3766 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3767 { 3768 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3769 3770 PetscFunctionBegin; 3771 *Ad = a->A; 3772 *Ao = a->B; 3773 *colmap = a->garray; 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3779 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3780 { 3781 PetscErrorCode ierr; 3782 PetscInt i; 3783 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3784 3785 PetscFunctionBegin; 3786 if (coloring->ctype == IS_COLORING_GLOBAL) { 3787 ISColoringValue *allcolors,*colors; 3788 ISColoring ocoloring; 3789 3790 /* set coloring for diagonal portion */ 3791 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3792 3793 /* set coloring for off-diagonal portion */ 3794 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3795 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3796 for (i=0; i<a->B->cmap->n; i++) { 3797 colors[i] = allcolors[a->garray[i]]; 3798 } 3799 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3800 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3801 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3802 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3803 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3804 ISColoringValue *colors; 3805 PetscInt *larray; 3806 ISColoring ocoloring; 3807 3808 /* set coloring for diagonal portion */ 3809 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3810 for (i=0; i<a->A->cmap->n; i++) { 3811 larray[i] = i + A->cmap->rstart; 3812 } 3813 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3814 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3815 for (i=0; i<a->A->cmap->n; i++) { 3816 colors[i] = coloring->colors[larray[i]]; 3817 } 3818 ierr = PetscFree(larray);CHKERRQ(ierr); 3819 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3820 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3821 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3822 3823 /* set coloring for off-diagonal portion */ 3824 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3825 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3826 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3827 for (i=0; i<a->B->cmap->n; i++) { 3828 colors[i] = coloring->colors[larray[i]]; 3829 } 3830 ierr = PetscFree(larray);CHKERRQ(ierr); 3831 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3832 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3833 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3834 } else { 3835 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3836 } 3837 3838 PetscFunctionReturn(0); 3839 } 3840 3841 #if defined(PETSC_HAVE_ADIC) 3842 #undef __FUNCT__ 3843 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3844 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3845 { 3846 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3847 PetscErrorCode ierr; 3848 3849 PetscFunctionBegin; 3850 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3851 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3852 PetscFunctionReturn(0); 3853 } 3854 #endif 3855 3856 #undef __FUNCT__ 3857 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3858 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3859 { 3860 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3861 PetscErrorCode ierr; 3862 3863 PetscFunctionBegin; 3864 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3865 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3866 PetscFunctionReturn(0); 3867 } 3868 3869 #undef __FUNCT__ 3870 #define __FUNCT__ "MatMerge" 3871 /*@ 3872 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3873 matrices from each processor 3874 3875 Collective on MPI_Comm 3876 3877 Input Parameters: 3878 + comm - the communicators the parallel matrix will live on 3879 . inmat - the input sequential matrices 3880 . n - number of local columns (or PETSC_DECIDE) 3881 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3882 3883 Output Parameter: 3884 . outmat - the parallel matrix generated 3885 3886 Level: advanced 3887 3888 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3889 3890 @*/ 3891 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3892 { 3893 PetscErrorCode ierr; 3894 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3895 PetscInt *indx; 3896 PetscScalar *values; 3897 3898 PetscFunctionBegin; 3899 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3900 if (scall == MAT_INITIAL_MATRIX){ 3901 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3902 if (n == PETSC_DECIDE){ 3903 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3904 } 3905 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3906 rstart -= m; 3907 3908 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3909 for (i=0;i<m;i++) { 3910 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3911 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3912 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3913 } 3914 /* This routine will ONLY return MPIAIJ type matrix */ 3915 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3916 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3917 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3918 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3919 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3920 3921 } else if (scall == MAT_REUSE_MATRIX){ 3922 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3923 } else { 3924 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3925 } 3926 3927 for (i=0;i<m;i++) { 3928 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3929 Ii = i + rstart; 3930 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3931 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3932 } 3933 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3934 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3936 3937 PetscFunctionReturn(0); 3938 } 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "MatFileSplit" 3942 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3943 { 3944 PetscErrorCode ierr; 3945 PetscMPIInt rank; 3946 PetscInt m,N,i,rstart,nnz; 3947 size_t len; 3948 const PetscInt *indx; 3949 PetscViewer out; 3950 char *name; 3951 Mat B; 3952 const PetscScalar *values; 3953 3954 PetscFunctionBegin; 3955 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3956 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3957 /* Should this be the type of the diagonal block of A? */ 3958 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3959 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3960 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3961 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3962 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3963 for (i=0;i<m;i++) { 3964 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3965 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3966 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3967 } 3968 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3969 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3970 3971 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3972 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3973 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3974 sprintf(name,"%s.%d",outfile,rank); 3975 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3976 ierr = PetscFree(name); 3977 ierr = MatView(B,out);CHKERRQ(ierr); 3978 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3979 ierr = MatDestroy(B);CHKERRQ(ierr); 3980 PetscFunctionReturn(0); 3981 } 3982 3983 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3984 #undef __FUNCT__ 3985 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3986 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3987 { 3988 PetscErrorCode ierr; 3989 Mat_Merge_SeqsToMPI *merge; 3990 PetscContainer container; 3991 3992 PetscFunctionBegin; 3993 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3994 if (container) { 3995 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4007 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4008 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4009 4010 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4011 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4012 } 4013 ierr = PetscFree(merge);CHKERRQ(ierr); 4014 4015 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4016 PetscFunctionReturn(0); 4017 } 4018 4019 #include "../src/mat/utils/freespace.h" 4020 #include "petscbt.h" 4021 4022 #undef __FUNCT__ 4023 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4024 /*@C 4025 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4026 matrices from each processor 4027 4028 Collective on MPI_Comm 4029 4030 Input Parameters: 4031 + comm - the communicators the parallel matrix will live on 4032 . seqmat - the input sequential matrices 4033 . m - number of local rows (or PETSC_DECIDE) 4034 . n - number of local columns (or PETSC_DECIDE) 4035 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4036 4037 Output Parameter: 4038 . mpimat - the parallel matrix generated 4039 4040 Level: advanced 4041 4042 Notes: 4043 The dimensions of the sequential matrix in each processor MUST be the same. 4044 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4045 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4046 @*/ 4047 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4048 { 4049 PetscErrorCode ierr; 4050 MPI_Comm comm=((PetscObject)mpimat)->comm; 4051 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4052 PetscMPIInt size,rank,taga,*len_s; 4053 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4054 PetscInt proc,m; 4055 PetscInt **buf_ri,**buf_rj; 4056 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4057 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4058 MPI_Request *s_waits,*r_waits; 4059 MPI_Status *status; 4060 MatScalar *aa=a->a; 4061 MatScalar **abuf_r,*ba_i; 4062 Mat_Merge_SeqsToMPI *merge; 4063 PetscContainer container; 4064 4065 PetscFunctionBegin; 4066 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4067 4068 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4069 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4070 4071 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4072 if (container) { 4073 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4074 } 4075 bi = merge->bi; 4076 bj = merge->bj; 4077 buf_ri = merge->buf_ri; 4078 buf_rj = merge->buf_rj; 4079 4080 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4081 owners = merge->rowmap->range; 4082 len_s = merge->len_s; 4083 4084 /* send and recv matrix values */ 4085 /*-----------------------------*/ 4086 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4087 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4088 4089 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4090 for (proc=0,k=0; proc<size; proc++){ 4091 if (!len_s[proc]) continue; 4092 i = owners[proc]; 4093 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4094 k++; 4095 } 4096 4097 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4098 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4099 ierr = PetscFree(status);CHKERRQ(ierr); 4100 4101 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4102 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4103 4104 /* insert mat values of mpimat */ 4105 /*----------------------------*/ 4106 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4107 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4108 4109 for (k=0; k<merge->nrecv; k++){ 4110 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4111 nrows = *(buf_ri_k[k]); 4112 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4113 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4114 } 4115 4116 /* set values of ba */ 4117 m = merge->rowmap->n; 4118 for (i=0; i<m; i++) { 4119 arow = owners[rank] + i; 4120 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4121 bnzi = bi[i+1] - bi[i]; 4122 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4123 4124 /* add local non-zero vals of this proc's seqmat into ba */ 4125 anzi = ai[arow+1] - ai[arow]; 4126 aj = a->j + ai[arow]; 4127 aa = a->a + ai[arow]; 4128 nextaj = 0; 4129 for (j=0; nextaj<anzi; j++){ 4130 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4131 ba_i[j] += aa[nextaj++]; 4132 } 4133 } 4134 4135 /* add received vals into ba */ 4136 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4137 /* i-th row */ 4138 if (i == *nextrow[k]) { 4139 anzi = *(nextai[k]+1) - *nextai[k]; 4140 aj = buf_rj[k] + *(nextai[k]); 4141 aa = abuf_r[k] + *(nextai[k]); 4142 nextaj = 0; 4143 for (j=0; nextaj<anzi; j++){ 4144 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4145 ba_i[j] += aa[nextaj++]; 4146 } 4147 } 4148 nextrow[k]++; nextai[k]++; 4149 } 4150 } 4151 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4152 } 4153 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4154 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 4156 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4157 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4158 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4159 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4160 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4161 PetscFunctionReturn(0); 4162 } 4163 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4166 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4167 { 4168 PetscErrorCode ierr; 4169 Mat B_mpi; 4170 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4171 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4172 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4173 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4174 PetscInt len,proc,*dnz,*onz; 4175 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4176 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4177 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4178 MPI_Status *status; 4179 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4180 PetscBT lnkbt; 4181 Mat_Merge_SeqsToMPI *merge; 4182 PetscContainer container; 4183 4184 PetscFunctionBegin; 4185 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4186 4187 /* make sure it is a PETSc comm */ 4188 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4189 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4190 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4191 4192 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4193 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4194 4195 /* determine row ownership */ 4196 /*---------------------------------------------------------*/ 4197 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4198 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4199 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4200 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4201 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4202 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4203 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4204 4205 m = merge->rowmap->n; 4206 M = merge->rowmap->N; 4207 owners = merge->rowmap->range; 4208 4209 /* determine the number of messages to send, their lengths */ 4210 /*---------------------------------------------------------*/ 4211 len_s = merge->len_s; 4212 4213 len = 0; /* length of buf_si[] */ 4214 merge->nsend = 0; 4215 for (proc=0; proc<size; proc++){ 4216 len_si[proc] = 0; 4217 if (proc == rank){ 4218 len_s[proc] = 0; 4219 } else { 4220 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4221 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4222 } 4223 if (len_s[proc]) { 4224 merge->nsend++; 4225 nrows = 0; 4226 for (i=owners[proc]; i<owners[proc+1]; i++){ 4227 if (ai[i+1] > ai[i]) nrows++; 4228 } 4229 len_si[proc] = 2*(nrows+1); 4230 len += len_si[proc]; 4231 } 4232 } 4233 4234 /* determine the number and length of messages to receive for ij-structure */ 4235 /*-------------------------------------------------------------------------*/ 4236 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4237 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4238 4239 /* post the Irecv of j-structure */ 4240 /*-------------------------------*/ 4241 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4242 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4243 4244 /* post the Isend of j-structure */ 4245 /*--------------------------------*/ 4246 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4247 4248 for (proc=0, k=0; proc<size; proc++){ 4249 if (!len_s[proc]) continue; 4250 i = owners[proc]; 4251 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4252 k++; 4253 } 4254 4255 /* receives and sends of j-structure are complete */ 4256 /*------------------------------------------------*/ 4257 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4258 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4259 4260 /* send and recv i-structure */ 4261 /*---------------------------*/ 4262 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4263 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4264 4265 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4266 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4267 for (proc=0,k=0; proc<size; proc++){ 4268 if (!len_s[proc]) continue; 4269 /* form outgoing message for i-structure: 4270 buf_si[0]: nrows to be sent 4271 [1:nrows]: row index (global) 4272 [nrows+1:2*nrows+1]: i-structure index 4273 */ 4274 /*-------------------------------------------*/ 4275 nrows = len_si[proc]/2 - 1; 4276 buf_si_i = buf_si + nrows+1; 4277 buf_si[0] = nrows; 4278 buf_si_i[0] = 0; 4279 nrows = 0; 4280 for (i=owners[proc]; i<owners[proc+1]; i++){ 4281 anzi = ai[i+1] - ai[i]; 4282 if (anzi) { 4283 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4284 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4285 nrows++; 4286 } 4287 } 4288 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4289 k++; 4290 buf_si += len_si[proc]; 4291 } 4292 4293 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4294 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4295 4296 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4297 for (i=0; i<merge->nrecv; i++){ 4298 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4299 } 4300 4301 ierr = PetscFree(len_si);CHKERRQ(ierr); 4302 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4303 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4304 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4305 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4306 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4307 ierr = PetscFree(status);CHKERRQ(ierr); 4308 4309 /* compute a local seq matrix in each processor */ 4310 /*----------------------------------------------*/ 4311 /* allocate bi array and free space for accumulating nonzero column info */ 4312 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4313 bi[0] = 0; 4314 4315 /* create and initialize a linked list */ 4316 nlnk = N+1; 4317 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4318 4319 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4320 len = 0; 4321 len = ai[owners[rank+1]] - ai[owners[rank]]; 4322 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4323 current_space = free_space; 4324 4325 /* determine symbolic info for each local row */ 4326 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4327 4328 for (k=0; k<merge->nrecv; k++){ 4329 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4330 nrows = *buf_ri_k[k]; 4331 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4332 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4333 } 4334 4335 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4336 len = 0; 4337 for (i=0;i<m;i++) { 4338 bnzi = 0; 4339 /* add local non-zero cols of this proc's seqmat into lnk */ 4340 arow = owners[rank] + i; 4341 anzi = ai[arow+1] - ai[arow]; 4342 aj = a->j + ai[arow]; 4343 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4344 bnzi += nlnk; 4345 /* add received col data into lnk */ 4346 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4347 if (i == *nextrow[k]) { /* i-th row */ 4348 anzi = *(nextai[k]+1) - *nextai[k]; 4349 aj = buf_rj[k] + *nextai[k]; 4350 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4351 bnzi += nlnk; 4352 nextrow[k]++; nextai[k]++; 4353 } 4354 } 4355 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4356 4357 /* if free space is not available, make more free space */ 4358 if (current_space->local_remaining<bnzi) { 4359 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4360 nspacedouble++; 4361 } 4362 /* copy data into free space, then initialize lnk */ 4363 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4364 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4365 4366 current_space->array += bnzi; 4367 current_space->local_used += bnzi; 4368 current_space->local_remaining -= bnzi; 4369 4370 bi[i+1] = bi[i] + bnzi; 4371 } 4372 4373 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4374 4375 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4376 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4377 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4378 4379 /* create symbolic parallel matrix B_mpi */ 4380 /*---------------------------------------*/ 4381 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4382 if (n==PETSC_DECIDE) { 4383 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4384 } else { 4385 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4386 } 4387 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4388 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4389 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4390 4391 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4392 B_mpi->assembled = PETSC_FALSE; 4393 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4394 merge->bi = bi; 4395 merge->bj = bj; 4396 merge->buf_ri = buf_ri; 4397 merge->buf_rj = buf_rj; 4398 merge->coi = PETSC_NULL; 4399 merge->coj = PETSC_NULL; 4400 merge->owners_co = PETSC_NULL; 4401 4402 /* attach the supporting struct to B_mpi for reuse */ 4403 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4404 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4405 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4406 *mpimat = B_mpi; 4407 4408 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4409 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 #undef __FUNCT__ 4414 #define __FUNCT__ "MatMerge_SeqsToMPI" 4415 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4416 { 4417 PetscErrorCode ierr; 4418 4419 PetscFunctionBegin; 4420 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4421 if (scall == MAT_INITIAL_MATRIX){ 4422 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4423 } 4424 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4425 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 #undef __FUNCT__ 4430 #define __FUNCT__ "MatGetLocalMat" 4431 /*@ 4432 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4433 4434 Not Collective 4435 4436 Input Parameters: 4437 + A - the matrix 4438 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4439 4440 Output Parameter: 4441 . A_loc - the local sequential matrix generated 4442 4443 Level: developer 4444 4445 @*/ 4446 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4447 { 4448 PetscErrorCode ierr; 4449 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4450 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4451 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4452 MatScalar *aa=a->a,*ba=b->a,*cam; 4453 PetscScalar *ca; 4454 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4455 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4456 4457 PetscFunctionBegin; 4458 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4459 if (scall == MAT_INITIAL_MATRIX){ 4460 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4461 ci[0] = 0; 4462 for (i=0; i<am; i++){ 4463 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4464 } 4465 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4466 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4467 k = 0; 4468 for (i=0; i<am; i++) { 4469 ncols_o = bi[i+1] - bi[i]; 4470 ncols_d = ai[i+1] - ai[i]; 4471 /* off-diagonal portion of A */ 4472 for (jo=0; jo<ncols_o; jo++) { 4473 col = cmap[*bj]; 4474 if (col >= cstart) break; 4475 cj[k] = col; bj++; 4476 ca[k++] = *ba++; 4477 } 4478 /* diagonal portion of A */ 4479 for (j=0; j<ncols_d; j++) { 4480 cj[k] = cstart + *aj++; 4481 ca[k++] = *aa++; 4482 } 4483 /* off-diagonal portion of A */ 4484 for (j=jo; j<ncols_o; j++) { 4485 cj[k] = cmap[*bj++]; 4486 ca[k++] = *ba++; 4487 } 4488 } 4489 /* put together the new matrix */ 4490 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4491 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4492 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4493 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4494 mat->free_a = PETSC_TRUE; 4495 mat->free_ij = PETSC_TRUE; 4496 mat->nonew = 0; 4497 } else if (scall == MAT_REUSE_MATRIX){ 4498 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4499 ci = mat->i; cj = mat->j; cam = mat->a; 4500 for (i=0; i<am; i++) { 4501 /* off-diagonal portion of A */ 4502 ncols_o = bi[i+1] - bi[i]; 4503 for (jo=0; jo<ncols_o; jo++) { 4504 col = cmap[*bj]; 4505 if (col >= cstart) break; 4506 *cam++ = *ba++; bj++; 4507 } 4508 /* diagonal portion of A */ 4509 ncols_d = ai[i+1] - ai[i]; 4510 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4511 /* off-diagonal portion of A */ 4512 for (j=jo; j<ncols_o; j++) { 4513 *cam++ = *ba++; bj++; 4514 } 4515 } 4516 } else { 4517 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4518 } 4519 4520 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4521 PetscFunctionReturn(0); 4522 } 4523 4524 #undef __FUNCT__ 4525 #define __FUNCT__ "MatGetLocalMatCondensed" 4526 /*@C 4527 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4528 4529 Not Collective 4530 4531 Input Parameters: 4532 + A - the matrix 4533 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4534 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4535 4536 Output Parameter: 4537 . A_loc - the local sequential matrix generated 4538 4539 Level: developer 4540 4541 @*/ 4542 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4543 { 4544 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4545 PetscErrorCode ierr; 4546 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4547 IS isrowa,iscola; 4548 Mat *aloc; 4549 4550 PetscFunctionBegin; 4551 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4552 if (!row){ 4553 start = A->rmap->rstart; end = A->rmap->rend; 4554 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4555 } else { 4556 isrowa = *row; 4557 } 4558 if (!col){ 4559 start = A->cmap->rstart; 4560 cmap = a->garray; 4561 nzA = a->A->cmap->n; 4562 nzB = a->B->cmap->n; 4563 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4564 ncols = 0; 4565 for (i=0; i<nzB; i++) { 4566 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4567 else break; 4568 } 4569 imark = i; 4570 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4571 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4572 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4573 ierr = PetscFree(idx);CHKERRQ(ierr); 4574 } else { 4575 iscola = *col; 4576 } 4577 if (scall != MAT_INITIAL_MATRIX){ 4578 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4579 aloc[0] = *A_loc; 4580 } 4581 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4582 *A_loc = aloc[0]; 4583 ierr = PetscFree(aloc);CHKERRQ(ierr); 4584 if (!row){ 4585 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4586 } 4587 if (!col){ 4588 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4589 } 4590 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4591 PetscFunctionReturn(0); 4592 } 4593 4594 #undef __FUNCT__ 4595 #define __FUNCT__ "MatGetBrowsOfAcols" 4596 /*@C 4597 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4598 4599 Collective on Mat 4600 4601 Input Parameters: 4602 + A,B - the matrices in mpiaij format 4603 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4604 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4605 4606 Output Parameter: 4607 + rowb, colb - index sets of rows and columns of B to extract 4608 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4609 - B_seq - the sequential matrix generated 4610 4611 Level: developer 4612 4613 @*/ 4614 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4615 { 4616 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4617 PetscErrorCode ierr; 4618 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4619 IS isrowb,iscolb; 4620 Mat *bseq; 4621 4622 PetscFunctionBegin; 4623 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4624 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4625 } 4626 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4627 4628 if (scall == MAT_INITIAL_MATRIX){ 4629 start = A->cmap->rstart; 4630 cmap = a->garray; 4631 nzA = a->A->cmap->n; 4632 nzB = a->B->cmap->n; 4633 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4634 ncols = 0; 4635 for (i=0; i<nzB; i++) { /* row < local row index */ 4636 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4637 else break; 4638 } 4639 imark = i; 4640 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4641 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4642 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4643 ierr = PetscFree(idx);CHKERRQ(ierr); 4644 *brstart = imark; 4645 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4646 } else { 4647 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4648 isrowb = *rowb; iscolb = *colb; 4649 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4650 bseq[0] = *B_seq; 4651 } 4652 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4653 *B_seq = bseq[0]; 4654 ierr = PetscFree(bseq);CHKERRQ(ierr); 4655 if (!rowb){ 4656 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4657 } else { 4658 *rowb = isrowb; 4659 } 4660 if (!colb){ 4661 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4662 } else { 4663 *colb = iscolb; 4664 } 4665 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4666 PetscFunctionReturn(0); 4667 } 4668 4669 #undef __FUNCT__ 4670 #define __FUNCT__ "MatGetBrowsOfAoCols" 4671 /*@C 4672 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4673 of the OFF-DIAGONAL portion of local A 4674 4675 Collective on Mat 4676 4677 Input Parameters: 4678 + A,B - the matrices in mpiaij format 4679 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4680 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4681 . startsj_r - similar to startsj for receives 4682 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4683 4684 Output Parameter: 4685 + B_oth - the sequential matrix generated 4686 4687 Level: developer 4688 4689 @*/ 4690 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4691 { 4692 VecScatter_MPI_General *gen_to,*gen_from; 4693 PetscErrorCode ierr; 4694 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4695 Mat_SeqAIJ *b_oth; 4696 VecScatter ctx=a->Mvctx; 4697 MPI_Comm comm=((PetscObject)ctx)->comm; 4698 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4699 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4700 PetscScalar *rvalues,*svalues; 4701 MatScalar *b_otha,*bufa,*bufA; 4702 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4703 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4704 MPI_Status *sstatus,rstatus; 4705 PetscMPIInt jj; 4706 PetscInt *cols,sbs,rbs; 4707 PetscScalar *vals; 4708 4709 PetscFunctionBegin; 4710 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4711 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4712 } 4713 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4714 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4715 4716 gen_to = (VecScatter_MPI_General*)ctx->todata; 4717 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4718 rvalues = gen_from->values; /* holds the length of receiving row */ 4719 svalues = gen_to->values; /* holds the length of sending row */ 4720 nrecvs = gen_from->n; 4721 nsends = gen_to->n; 4722 4723 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4724 srow = gen_to->indices; /* local row index to be sent */ 4725 sstarts = gen_to->starts; 4726 sprocs = gen_to->procs; 4727 sstatus = gen_to->sstatus; 4728 sbs = gen_to->bs; 4729 rstarts = gen_from->starts; 4730 rprocs = gen_from->procs; 4731 rbs = gen_from->bs; 4732 4733 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4734 if (scall == MAT_INITIAL_MATRIX){ 4735 /* i-array */ 4736 /*---------*/ 4737 /* post receives */ 4738 for (i=0; i<nrecvs; i++){ 4739 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4740 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4741 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4742 } 4743 4744 /* pack the outgoing message */ 4745 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4746 sstartsj[0] = 0; rstartsj[0] = 0; 4747 len = 0; /* total length of j or a array to be sent */ 4748 k = 0; 4749 for (i=0; i<nsends; i++){ 4750 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4752 for (j=0; j<nrows; j++) { 4753 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4754 for (l=0; l<sbs; l++){ 4755 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4756 rowlen[j*sbs+l] = ncols; 4757 len += ncols; 4758 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4759 } 4760 k++; 4761 } 4762 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4763 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4764 } 4765 /* recvs and sends of i-array are completed */ 4766 i = nrecvs; 4767 while (i--) { 4768 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4769 } 4770 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4771 4772 /* allocate buffers for sending j and a arrays */ 4773 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4774 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4775 4776 /* create i-array of B_oth */ 4777 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4778 b_othi[0] = 0; 4779 len = 0; /* total length of j or a array to be received */ 4780 k = 0; 4781 for (i=0; i<nrecvs; i++){ 4782 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4783 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4784 for (j=0; j<nrows; j++) { 4785 b_othi[k+1] = b_othi[k] + rowlen[j]; 4786 len += rowlen[j]; k++; 4787 } 4788 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4789 } 4790 4791 /* allocate space for j and a arrrays of B_oth */ 4792 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4793 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4794 4795 /* j-array */ 4796 /*---------*/ 4797 /* post receives of j-array */ 4798 for (i=0; i<nrecvs; i++){ 4799 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4800 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4801 } 4802 4803 /* pack the outgoing message j-array */ 4804 k = 0; 4805 for (i=0; i<nsends; i++){ 4806 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4807 bufJ = bufj+sstartsj[i]; 4808 for (j=0; j<nrows; j++) { 4809 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4810 for (ll=0; ll<sbs; ll++){ 4811 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4812 for (l=0; l<ncols; l++){ 4813 *bufJ++ = cols[l]; 4814 } 4815 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4816 } 4817 } 4818 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4819 } 4820 4821 /* recvs and sends of j-array are completed */ 4822 i = nrecvs; 4823 while (i--) { 4824 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4825 } 4826 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4827 } else if (scall == MAT_REUSE_MATRIX){ 4828 sstartsj = *startsj; 4829 rstartsj = *startsj_r; 4830 bufa = *bufa_ptr; 4831 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4832 b_otha = b_oth->a; 4833 } else { 4834 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4835 } 4836 4837 /* a-array */ 4838 /*---------*/ 4839 /* post receives of a-array */ 4840 for (i=0; i<nrecvs; i++){ 4841 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4842 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4843 } 4844 4845 /* pack the outgoing message a-array */ 4846 k = 0; 4847 for (i=0; i<nsends; i++){ 4848 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4849 bufA = bufa+sstartsj[i]; 4850 for (j=0; j<nrows; j++) { 4851 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4852 for (ll=0; ll<sbs; ll++){ 4853 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4854 for (l=0; l<ncols; l++){ 4855 *bufA++ = vals[l]; 4856 } 4857 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4858 } 4859 } 4860 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4861 } 4862 /* recvs and sends of a-array are completed */ 4863 i = nrecvs; 4864 while (i--) { 4865 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4866 } 4867 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4868 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4869 4870 if (scall == MAT_INITIAL_MATRIX){ 4871 /* put together the new matrix */ 4872 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4873 4874 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4875 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4876 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4877 b_oth->free_a = PETSC_TRUE; 4878 b_oth->free_ij = PETSC_TRUE; 4879 b_oth->nonew = 0; 4880 4881 ierr = PetscFree(bufj);CHKERRQ(ierr); 4882 if (!startsj || !bufa_ptr){ 4883 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4884 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4885 } else { 4886 *startsj = sstartsj; 4887 *startsj_r = rstartsj; 4888 *bufa_ptr = bufa; 4889 } 4890 } 4891 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4892 PetscFunctionReturn(0); 4893 } 4894 4895 #undef __FUNCT__ 4896 #define __FUNCT__ "MatGetCommunicationStructs" 4897 /*@C 4898 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4899 4900 Not Collective 4901 4902 Input Parameters: 4903 . A - The matrix in mpiaij format 4904 4905 Output Parameter: 4906 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4907 . colmap - A map from global column index to local index into lvec 4908 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4909 4910 Level: developer 4911 4912 @*/ 4913 #if defined (PETSC_USE_CTABLE) 4914 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4915 #else 4916 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4917 #endif 4918 { 4919 Mat_MPIAIJ *a; 4920 4921 PetscFunctionBegin; 4922 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4923 PetscValidPointer(lvec, 2); 4924 PetscValidPointer(colmap, 3); 4925 PetscValidPointer(multScatter, 4); 4926 a = (Mat_MPIAIJ *) A->data; 4927 if (lvec) *lvec = a->lvec; 4928 if (colmap) *colmap = a->colmap; 4929 if (multScatter) *multScatter = a->Mvctx; 4930 PetscFunctionReturn(0); 4931 } 4932 4933 EXTERN_C_BEGIN 4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4936 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4937 EXTERN_C_END 4938 4939 #undef __FUNCT__ 4940 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4941 /* 4942 Computes (B'*A')' since computing B*A directly is untenable 4943 4944 n p p 4945 ( ) ( ) ( ) 4946 m ( A ) * n ( B ) = m ( C ) 4947 ( ) ( ) ( ) 4948 4949 */ 4950 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4951 { 4952 PetscErrorCode ierr; 4953 Mat At,Bt,Ct; 4954 4955 PetscFunctionBegin; 4956 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4957 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4958 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4959 ierr = MatDestroy(At);CHKERRQ(ierr); 4960 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4961 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4962 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4963 PetscFunctionReturn(0); 4964 } 4965 4966 #undef __FUNCT__ 4967 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4968 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4969 { 4970 PetscErrorCode ierr; 4971 PetscInt m=A->rmap->n,n=B->cmap->n; 4972 Mat Cmat; 4973 4974 PetscFunctionBegin; 4975 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4976 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4977 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4978 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4979 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4980 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4981 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4982 *C = Cmat; 4983 PetscFunctionReturn(0); 4984 } 4985 4986 /* ----------------------------------------------------------------*/ 4987 #undef __FUNCT__ 4988 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4989 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4990 { 4991 PetscErrorCode ierr; 4992 4993 PetscFunctionBegin; 4994 if (scall == MAT_INITIAL_MATRIX){ 4995 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4996 } 4997 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4998 PetscFunctionReturn(0); 4999 } 5000 5001 EXTERN_C_BEGIN 5002 #if defined(PETSC_HAVE_MUMPS) 5003 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5004 #endif 5005 #if defined(PETSC_HAVE_PASTIX) 5006 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5007 #endif 5008 #if defined(PETSC_HAVE_SUPERLU_DIST) 5009 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5010 #endif 5011 #if defined(PETSC_HAVE_SPOOLES) 5012 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5013 #endif 5014 EXTERN_C_END 5015 5016 /*MC 5017 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5018 5019 Options Database Keys: 5020 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5021 5022 Level: beginner 5023 5024 .seealso: MatCreateMPIAIJ() 5025 M*/ 5026 5027 EXTERN_C_BEGIN 5028 #undef __FUNCT__ 5029 #define __FUNCT__ "MatCreate_MPIAIJ" 5030 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5031 { 5032 Mat_MPIAIJ *b; 5033 PetscErrorCode ierr; 5034 PetscMPIInt size; 5035 5036 PetscFunctionBegin; 5037 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5038 5039 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5040 B->data = (void*)b; 5041 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5042 B->rmap->bs = 1; 5043 B->assembled = PETSC_FALSE; 5044 B->mapping = 0; 5045 5046 B->insertmode = NOT_SET_VALUES; 5047 b->size = size; 5048 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5049 5050 /* build cache for off array entries formed */ 5051 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5052 b->donotstash = PETSC_FALSE; 5053 b->colmap = 0; 5054 b->garray = 0; 5055 b->roworiented = PETSC_TRUE; 5056 5057 /* stuff used for matrix vector multiply */ 5058 b->lvec = PETSC_NULL; 5059 b->Mvctx = PETSC_NULL; 5060 5061 /* stuff for MatGetRow() */ 5062 b->rowindices = 0; 5063 b->rowvalues = 0; 5064 b->getrowactive = PETSC_FALSE; 5065 5066 #if defined(PETSC_HAVE_SPOOLES) 5067 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5068 "MatGetFactor_mpiaij_spooles", 5069 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5070 #endif 5071 #if defined(PETSC_HAVE_MUMPS) 5072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5073 "MatGetFactor_aij_mumps", 5074 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5075 #endif 5076 #if defined(PETSC_HAVE_PASTIX) 5077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5078 "MatGetFactor_mpiaij_pastix", 5079 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5080 #endif 5081 #if defined(PETSC_HAVE_SUPERLU_DIST) 5082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5083 "MatGetFactor_mpiaij_superlu_dist", 5084 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5085 #endif 5086 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5087 "MatStoreValues_MPIAIJ", 5088 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5090 "MatRetrieveValues_MPIAIJ", 5091 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5093 "MatGetDiagonalBlock_MPIAIJ", 5094 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5096 "MatIsTranspose_MPIAIJ", 5097 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5099 "MatMPIAIJSetPreallocation_MPIAIJ", 5100 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5102 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5103 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5105 "MatDiagonalScaleLocal_MPIAIJ", 5106 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5108 "MatConvert_MPIAIJ_MPICSRPERM", 5109 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5110 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5111 "MatConvert_MPIAIJ_MPICRL", 5112 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5114 "MatConvert_MPIAIJ_MPISBAIJ", 5115 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5116 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5117 "MatMatMult_MPIDense_MPIAIJ", 5118 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5120 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5121 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5123 "MatMatMultNumeric_MPIDense_MPIAIJ", 5124 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5125 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5126 PetscFunctionReturn(0); 5127 } 5128 EXTERN_C_END 5129 5130 #undef __FUNCT__ 5131 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5132 /*@ 5133 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5134 and "off-diagonal" part of the matrix in CSR format. 5135 5136 Collective on MPI_Comm 5137 5138 Input Parameters: 5139 + comm - MPI communicator 5140 . m - number of local rows (Cannot be PETSC_DECIDE) 5141 . n - This value should be the same as the local size used in creating the 5142 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5143 calculated if N is given) For square matrices n is almost always m. 5144 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5145 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5146 . i - row indices for "diagonal" portion of matrix 5147 . j - column indices 5148 . a - matrix values 5149 . oi - row indices for "off-diagonal" portion of matrix 5150 . oj - column indices 5151 - oa - matrix values 5152 5153 Output Parameter: 5154 . mat - the matrix 5155 5156 Level: advanced 5157 5158 Notes: 5159 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5160 5161 The i and j indices are 0 based 5162 5163 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5164 5165 This sets local rows and cannot be used to set off-processor values. 5166 5167 You cannot later use MatSetValues() to change values in this matrix. 5168 5169 .keywords: matrix, aij, compressed row, sparse, parallel 5170 5171 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5172 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5173 @*/ 5174 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5175 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5176 { 5177 PetscErrorCode ierr; 5178 Mat_MPIAIJ *maij; 5179 5180 PetscFunctionBegin; 5181 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5182 if (i[0]) { 5183 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5184 } 5185 if (oi[0]) { 5186 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5187 } 5188 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5189 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5190 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5191 maij = (Mat_MPIAIJ*) (*mat)->data; 5192 maij->donotstash = PETSC_TRUE; 5193 (*mat)->preallocated = PETSC_TRUE; 5194 5195 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5196 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5197 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5198 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5199 5200 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5201 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5202 5203 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5206 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5207 5208 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5209 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5210 PetscFunctionReturn(0); 5211 } 5212 5213 /* 5214 Special version for direct calls from Fortran 5215 */ 5216 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5217 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5218 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5219 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5220 #endif 5221 5222 /* Change these macros so can be used in void function */ 5223 #undef CHKERRQ 5224 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5225 #undef SETERRQ2 5226 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5227 #undef SETERRQ 5228 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5229 5230 EXTERN_C_BEGIN 5231 #undef __FUNCT__ 5232 #define __FUNCT__ "matsetvaluesmpiaij_" 5233 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5234 { 5235 Mat mat = *mmat; 5236 PetscInt m = *mm, n = *mn; 5237 InsertMode addv = *maddv; 5238 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5239 PetscScalar value; 5240 PetscErrorCode ierr; 5241 5242 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5243 if (mat->insertmode == NOT_SET_VALUES) { 5244 mat->insertmode = addv; 5245 } 5246 #if defined(PETSC_USE_DEBUG) 5247 else if (mat->insertmode != addv) { 5248 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5249 } 5250 #endif 5251 { 5252 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5253 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5254 PetscTruth roworiented = aij->roworiented; 5255 5256 /* Some Variables required in the macro */ 5257 Mat A = aij->A; 5258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5259 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5260 MatScalar *aa = a->a; 5261 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5262 Mat B = aij->B; 5263 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5264 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5265 MatScalar *ba = b->a; 5266 5267 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5268 PetscInt nonew = a->nonew; 5269 MatScalar *ap1,*ap2; 5270 5271 PetscFunctionBegin; 5272 for (i=0; i<m; i++) { 5273 if (im[i] < 0) continue; 5274 #if defined(PETSC_USE_DEBUG) 5275 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5276 #endif 5277 if (im[i] >= rstart && im[i] < rend) { 5278 row = im[i] - rstart; 5279 lastcol1 = -1; 5280 rp1 = aj + ai[row]; 5281 ap1 = aa + ai[row]; 5282 rmax1 = aimax[row]; 5283 nrow1 = ailen[row]; 5284 low1 = 0; 5285 high1 = nrow1; 5286 lastcol2 = -1; 5287 rp2 = bj + bi[row]; 5288 ap2 = ba + bi[row]; 5289 rmax2 = bimax[row]; 5290 nrow2 = bilen[row]; 5291 low2 = 0; 5292 high2 = nrow2; 5293 5294 for (j=0; j<n; j++) { 5295 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5296 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5297 if (in[j] >= cstart && in[j] < cend){ 5298 col = in[j] - cstart; 5299 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5300 } else if (in[j] < 0) continue; 5301 #if defined(PETSC_USE_DEBUG) 5302 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5303 #endif 5304 else { 5305 if (mat->was_assembled) { 5306 if (!aij->colmap) { 5307 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5308 } 5309 #if defined (PETSC_USE_CTABLE) 5310 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5311 col--; 5312 #else 5313 col = aij->colmap[in[j]] - 1; 5314 #endif 5315 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5316 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5317 col = in[j]; 5318 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5319 B = aij->B; 5320 b = (Mat_SeqAIJ*)B->data; 5321 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5322 rp2 = bj + bi[row]; 5323 ap2 = ba + bi[row]; 5324 rmax2 = bimax[row]; 5325 nrow2 = bilen[row]; 5326 low2 = 0; 5327 high2 = nrow2; 5328 bm = aij->B->rmap->n; 5329 ba = b->a; 5330 } 5331 } else col = in[j]; 5332 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5333 } 5334 } 5335 } else { 5336 if (!aij->donotstash) { 5337 if (roworiented) { 5338 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5339 } else { 5340 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5341 } 5342 } 5343 } 5344 }} 5345 PetscFunctionReturnVoid(); 5346 } 5347 EXTERN_C_END 5348 5349