1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SYMMETRIC: 1442 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1446 break; 1447 case MAT_HERMITIAN: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetRow_MPIAIJ" 1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1465 PetscErrorCode ierr; 1466 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1467 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1468 PetscInt *cmap,*idx_p; 1469 1470 PetscFunctionBegin; 1471 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1472 mat->getrowactive = PETSC_TRUE; 1473 1474 if (!mat->rowvalues && (idx || v)) { 1475 /* 1476 allocate enough space to hold information from the longest row. 1477 */ 1478 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1479 PetscInt max = 1,tmp; 1480 for (i=0; i<matin->rmap->n; i++) { 1481 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1482 if (max < tmp) { max = tmp; } 1483 } 1484 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1485 } 1486 1487 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1488 lrow = row - rstart; 1489 1490 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1491 if (!v) {pvA = 0; pvB = 0;} 1492 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1493 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1495 nztot = nzA + nzB; 1496 1497 cmap = mat->garray; 1498 if (v || idx) { 1499 if (nztot) { 1500 /* Sort by increasing column numbers, assuming A and B already sorted */ 1501 PetscInt imark = -1; 1502 if (v) { 1503 *v = v_p = mat->rowvalues; 1504 for (i=0; i<nzB; i++) { 1505 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1506 else break; 1507 } 1508 imark = i; 1509 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1510 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1511 } 1512 if (idx) { 1513 *idx = idx_p = mat->rowindices; 1514 if (imark > -1) { 1515 for (i=0; i<imark; i++) { 1516 idx_p[i] = cmap[cworkB[i]]; 1517 } 1518 } else { 1519 for (i=0; i<nzB; i++) { 1520 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1521 else break; 1522 } 1523 imark = i; 1524 } 1525 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1526 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 if (idx) *idx = 0; 1530 if (v) *v = 0; 1531 } 1532 } 1533 *nz = nztot; 1534 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1542 { 1543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1544 1545 PetscFunctionBegin; 1546 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1547 aij->getrowactive = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatNorm_MPIAIJ" 1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1556 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1557 PetscErrorCode ierr; 1558 PetscInt i,j,cstart = mat->cmap->rstart; 1559 PetscReal sum = 0.0; 1560 MatScalar *v; 1561 1562 PetscFunctionBegin; 1563 if (aij->size == 1) { 1564 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1565 } else { 1566 if (type == NORM_FROBENIUS) { 1567 v = amat->a; 1568 for (i=0; i<amat->nz; i++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1571 #else 1572 sum += (*v)*(*v); v++; 1573 #endif 1574 } 1575 v = bmat->a; 1576 for (i=0; i<bmat->nz; i++) { 1577 #if defined(PETSC_USE_COMPLEX) 1578 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1579 #else 1580 sum += (*v)*(*v); v++; 1581 #endif 1582 } 1583 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1584 *norm = sqrt(*norm); 1585 } else if (type == NORM_1) { /* max column norm */ 1586 PetscReal *tmp,*tmp2; 1587 PetscInt *jj,*garray = aij->garray; 1588 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1589 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1590 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1591 *norm = 0.0; 1592 v = amat->a; jj = amat->j; 1593 for (j=0; j<amat->nz; j++) { 1594 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1595 } 1596 v = bmat->a; jj = bmat->j; 1597 for (j=0; j<bmat->nz; j++) { 1598 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1599 } 1600 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 for (j=0; j<mat->cmap->N; j++) { 1602 if (tmp2[j] > *norm) *norm = tmp2[j]; 1603 } 1604 ierr = PetscFree(tmp);CHKERRQ(ierr); 1605 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1606 } else if (type == NORM_INFINITY) { /* max row norm */ 1607 PetscReal ntemp = 0.0; 1608 for (j=0; j<aij->A->rmap->n; j++) { 1609 v = amat->a + amat->i[j]; 1610 sum = 0.0; 1611 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1612 sum += PetscAbsScalar(*v); v++; 1613 } 1614 v = bmat->a + bmat->i[j]; 1615 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1616 sum += PetscAbsScalar(*v); v++; 1617 } 1618 if (sum > ntemp) ntemp = sum; 1619 } 1620 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1621 } else { 1622 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1623 } 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatTranspose_MPIAIJ" 1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1631 { 1632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1633 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1634 PetscErrorCode ierr; 1635 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1636 PetscInt cstart=A->cmap->rstart,ncol; 1637 Mat B; 1638 MatScalar *array; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1642 1643 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1644 ai = Aloc->i; aj = Aloc->j; 1645 bi = Bloc->i; bj = Bloc->j; 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1648 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1649 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1650 for (i=0; i<ai[ma]; i++){ 1651 d_nnz[aj[i]] ++; 1652 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1653 } 1654 1655 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1656 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1657 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1658 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1659 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1660 } else { 1661 B = *matout; 1662 } 1663 1664 /* copy over the A part */ 1665 array = Aloc->a; 1666 row = A->rmap->rstart; 1667 for (i=0; i<ma; i++) { 1668 ncol = ai[i+1]-ai[i]; 1669 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1670 row++; array += ncol; aj += ncol; 1671 } 1672 aj = Aloc->j; 1673 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1674 1675 /* copy over the B part */ 1676 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1677 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1678 array = Bloc->a; 1679 row = A->rmap->rstart; 1680 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1681 cols_tmp = cols; 1682 for (i=0; i<mb; i++) { 1683 ncol = bi[i+1]-bi[i]; 1684 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1685 row++; array += ncol; cols_tmp += ncol; 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1692 *matout = B; 1693 } else { 1694 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1702 { 1703 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1704 Mat a = aij->A,b = aij->B; 1705 PetscErrorCode ierr; 1706 PetscInt s1,s2,s3; 1707 1708 PetscFunctionBegin; 1709 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1710 if (rr) { 1711 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1712 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1713 /* Overlap communication with computation. */ 1714 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 } 1716 if (ll) { 1717 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1718 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1719 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1720 } 1721 /* scale the diagonal block */ 1722 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1723 1724 if (rr) { 1725 /* Do a scatter end and then right scale the off-diagonal block */ 1726 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1728 } 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1742 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1743 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1744 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1750 { 1751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatEqual_MPIAIJ" 1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1762 { 1763 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1764 Mat a,b,c,d; 1765 PetscTruth flg; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; b = matA->B; 1770 c = matB->A; d = matB->B; 1771 1772 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1773 if (flg) { 1774 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1775 } 1776 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCopy_MPIAIJ" 1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1783 { 1784 PetscErrorCode ierr; 1785 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1786 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1787 1788 PetscFunctionBegin; 1789 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1790 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1791 /* because of the column compression in the off-processor part of the matrix a->B, 1792 the number of columns in a->B and b->B may be different, hence we cannot call 1793 the MatCopy() directly on the two parts. If need be, we can provide a more 1794 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1795 then copying the submatrices */ 1796 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1799 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1807 { 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatAXPY_MPIAIJ" 1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 PetscInt i; 1821 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1822 PetscBLASInt bnz,one=1; 1823 Mat_SeqAIJ *x,*y; 1824 1825 PetscFunctionBegin; 1826 if (str == SAME_NONZERO_PATTERN) { 1827 PetscScalar alpha = a; 1828 x = (Mat_SeqAIJ *)xx->A->data; 1829 y = (Mat_SeqAIJ *)yy->A->data; 1830 bnz = PetscBLASIntCast(x->nz); 1831 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1832 x = (Mat_SeqAIJ *)xx->B->data; 1833 y = (Mat_SeqAIJ *)yy->B->data; 1834 bnz = PetscBLASIntCast(x->nz); 1835 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1836 } else if (str == SUBSET_NONZERO_PATTERN) { 1837 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1838 1839 x = (Mat_SeqAIJ *)xx->B->data; 1840 y = (Mat_SeqAIJ *)yy->B->data; 1841 if (y->xtoy && y->XtoY != xx->B) { 1842 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1843 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1844 } 1845 if (!y->xtoy) { /* get xtoy */ 1846 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1847 y->XtoY = xx->B; 1848 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1849 } 1850 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1851 } else { 1852 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatConjugate_MPIAIJ" 1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1862 { 1863 #if defined(PETSC_USE_COMPLEX) 1864 PetscErrorCode ierr; 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1866 1867 PetscFunctionBegin; 1868 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1869 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1870 #else 1871 PetscFunctionBegin; 1872 #endif 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatRealPart_MPIAIJ" 1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1879 { 1880 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1885 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1892 { 1893 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 1896 PetscFunctionBegin; 1897 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1898 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #ifdef PETSC_HAVE_PBGL 1903 1904 #include <boost/parallel/mpi/bsp_process_group.hpp> 1905 #include <boost/graph/distributed/ilu_default_graph.hpp> 1906 #include <boost/graph/distributed/ilu_0_block.hpp> 1907 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1908 #include <boost/graph/distributed/petsc/interface.hpp> 1909 #include <boost/multi_array.hpp> 1910 #include <boost/parallel/distributed_property_map->hpp> 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1914 /* 1915 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1916 */ 1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1918 { 1919 namespace petsc = boost::distributed::petsc; 1920 1921 namespace graph_dist = boost::graph::distributed; 1922 using boost::graph::distributed::ilu_default::process_group_type; 1923 using boost::graph::ilu_permuted; 1924 1925 PetscTruth row_identity, col_identity; 1926 PetscContainer c; 1927 PetscInt m, n, M, N; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1932 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1933 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1934 if (!row_identity || !col_identity) { 1935 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1936 } 1937 1938 process_group_type pg; 1939 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1940 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1941 lgraph_type& level_graph = *lgraph_p; 1942 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1943 1944 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1945 ilu_permuted(level_graph); 1946 1947 /* put together the new matrix */ 1948 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1949 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1950 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1951 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1952 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 1956 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1957 ierr = PetscContainerSetPointer(c, lgraph_p); 1958 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1965 { 1966 PetscFunctionBegin; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatSolve_MPIAIJ" 1972 /* 1973 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1974 */ 1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1976 { 1977 namespace graph_dist = boost::graph::distributed; 1978 1979 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1980 lgraph_type* lgraph_p; 1981 PetscContainer c; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1986 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1987 ierr = VecCopy(b, x);CHKERRQ(ierr); 1988 1989 PetscScalar* array_x; 1990 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1991 PetscInt sx; 1992 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1993 1994 PetscScalar* array_b; 1995 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1996 PetscInt sb; 1997 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1998 1999 lgraph_type& level_graph = *lgraph_p; 2000 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2001 2002 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2003 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2004 ref_x(array_x, boost::extents[num_vertices(graph)]); 2005 2006 typedef boost::iterator_property_map<array_ref_type::iterator, 2007 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2008 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2009 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2010 2011 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2012 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2018 PetscInt nzlocal,nsends,nrecvs; 2019 PetscMPIInt *send_rank,*recv_rank; 2020 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2021 PetscScalar *sbuf_a,**rbuf_a; 2022 PetscErrorCode (*MatDestroy)(Mat); 2023 } Mat_Redundant; 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2028 { 2029 PetscErrorCode ierr; 2030 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2031 PetscInt i; 2032 2033 PetscFunctionBegin; 2034 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2035 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2036 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2037 for (i=0; i<redund->nrecvs; i++){ 2038 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2039 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2042 ierr = PetscFree(redund);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDestroy_MatRedundant" 2048 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2049 { 2050 PetscErrorCode ierr; 2051 PetscContainer container; 2052 Mat_Redundant *redund=PETSC_NULL; 2053 2054 PetscFunctionBegin; 2055 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2056 if (container) { 2057 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2058 } else { 2059 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2060 } 2061 A->ops->destroy = redund->MatDestroy; 2062 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2063 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2064 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2071 { 2072 PetscMPIInt rank,size; 2073 MPI_Comm comm=((PetscObject)mat)->comm; 2074 PetscErrorCode ierr; 2075 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2076 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2077 PetscInt *rowrange=mat->rmap->range; 2078 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2079 Mat A=aij->A,B=aij->B,C=*matredundant; 2080 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2081 PetscScalar *sbuf_a; 2082 PetscInt nzlocal=a->nz+b->nz; 2083 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2084 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2085 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2086 MatScalar *aworkA,*aworkB; 2087 PetscScalar *vals; 2088 PetscMPIInt tag1,tag2,tag3,imdex; 2089 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2090 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2091 MPI_Status recv_status,*send_status; 2092 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2093 PetscInt **rbuf_j=PETSC_NULL; 2094 PetscScalar **rbuf_a=PETSC_NULL; 2095 Mat_Redundant *redund=PETSC_NULL; 2096 PetscContainer container; 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (reuse == MAT_REUSE_MATRIX) { 2103 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2104 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2105 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2106 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2107 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2108 if (container) { 2109 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2110 } else { 2111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2112 } 2113 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2114 2115 nsends = redund->nsends; 2116 nrecvs = redund->nrecvs; 2117 send_rank = redund->send_rank; 2118 recv_rank = redund->recv_rank; 2119 sbuf_nz = redund->sbuf_nz; 2120 rbuf_nz = redund->rbuf_nz; 2121 sbuf_j = redund->sbuf_j; 2122 sbuf_a = redund->sbuf_a; 2123 rbuf_j = redund->rbuf_j; 2124 rbuf_a = redund->rbuf_a; 2125 } 2126 2127 if (reuse == MAT_INITIAL_MATRIX){ 2128 PetscMPIInt subrank,subsize; 2129 PetscInt nleftover,np_subcomm; 2130 /* get the destination processors' id send_rank, nsends and nrecvs */ 2131 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2132 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2133 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2134 np_subcomm = size/nsubcomm; 2135 nleftover = size - nsubcomm*np_subcomm; 2136 nsends = 0; nrecvs = 0; 2137 for (i=0; i<size; i++){ /* i=rank*/ 2138 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2139 send_rank[nsends] = i; nsends++; 2140 recv_rank[nrecvs++] = i; 2141 } 2142 } 2143 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2144 i = size-nleftover-1; 2145 j = 0; 2146 while (j < nsubcomm - nleftover){ 2147 send_rank[nsends++] = i; 2148 i--; j++; 2149 } 2150 } 2151 2152 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2153 for (i=0; i<nleftover; i++){ 2154 recv_rank[nrecvs++] = size-nleftover+i; 2155 } 2156 } 2157 2158 /* allocate sbuf_j, sbuf_a */ 2159 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2160 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2161 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2162 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2163 2164 /* copy mat's local entries into the buffers */ 2165 if (reuse == MAT_INITIAL_MATRIX){ 2166 rownz_max = 0; 2167 rptr = sbuf_j; 2168 cols = sbuf_j + rend-rstart + 1; 2169 vals = sbuf_a; 2170 rptr[0] = 0; 2171 for (i=0; i<rend-rstart; i++){ 2172 row = i + rstart; 2173 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2174 ncols = nzA + nzB; 2175 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2176 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2177 /* load the column indices for this row into cols */ 2178 lwrite = 0; 2179 for (l=0; l<nzB; l++) { 2180 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2181 vals[lwrite] = aworkB[l]; 2182 cols[lwrite++] = ctmp; 2183 } 2184 } 2185 for (l=0; l<nzA; l++){ 2186 vals[lwrite] = aworkA[l]; 2187 cols[lwrite++] = cstart + cworkA[l]; 2188 } 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 vals += ncols; 2196 cols += ncols; 2197 rptr[i+1] = rptr[i] + ncols; 2198 if (rownz_max < ncols) rownz_max = ncols; 2199 } 2200 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2201 } else { /* only copy matrix values into sbuf_a */ 2202 rptr = sbuf_j; 2203 vals = sbuf_a; 2204 rptr[0] = 0; 2205 for (i=0; i<rend-rstart; i++){ 2206 row = i + rstart; 2207 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2208 ncols = nzA + nzB; 2209 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2210 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2214 } 2215 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2216 for (l=0; l<nzB; l++) { 2217 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2218 } 2219 vals += ncols; 2220 rptr[i+1] = rptr[i] + ncols; 2221 } 2222 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2223 2224 /* send nzlocal to others, and recv other's nzlocal */ 2225 /*--------------------------------------------------*/ 2226 if (reuse == MAT_INITIAL_MATRIX){ 2227 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2228 s_waits2 = s_waits3 + nsends; 2229 s_waits1 = s_waits2 + nsends; 2230 r_waits1 = s_waits1 + nsends; 2231 r_waits2 = r_waits1 + nrecvs; 2232 r_waits3 = r_waits2 + nrecvs; 2233 } else { 2234 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2235 r_waits3 = s_waits3 + nsends; 2236 } 2237 2238 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2239 if (reuse == MAT_INITIAL_MATRIX){ 2240 /* get new tags to keep the communication clean */ 2241 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2242 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2243 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2244 2245 /* post receives of other's nzlocal */ 2246 for (i=0; i<nrecvs; i++){ 2247 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2248 } 2249 /* send nzlocal to others */ 2250 for (i=0; i<nsends; i++){ 2251 sbuf_nz[i] = nzlocal; 2252 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2253 } 2254 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2255 count = nrecvs; 2256 while (count) { 2257 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2258 recv_rank[imdex] = recv_status.MPI_SOURCE; 2259 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2260 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2261 2262 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2263 rbuf_nz[imdex] += i + 2; 2264 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2265 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2266 count--; 2267 } 2268 /* wait on sends of nzlocal */ 2269 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2270 /* send mat->i,j to others, and recv from other's */ 2271 /*------------------------------------------------*/ 2272 for (i=0; i<nsends; i++){ 2273 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2274 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2275 } 2276 /* wait on receives of mat->i,j */ 2277 /*------------------------------*/ 2278 count = nrecvs; 2279 while (count) { 2280 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2281 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2282 count--; 2283 } 2284 /* wait on sends of mat->i,j */ 2285 /*---------------------------*/ 2286 if (nsends) { 2287 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2288 } 2289 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2290 2291 /* post receives, send and receive mat->a */ 2292 /*----------------------------------------*/ 2293 for (imdex=0; imdex<nrecvs; imdex++) { 2294 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2295 } 2296 for (i=0; i<nsends; i++){ 2297 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2298 } 2299 count = nrecvs; 2300 while (count) { 2301 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2302 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2303 count--; 2304 } 2305 if (nsends) { 2306 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2307 } 2308 2309 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2310 2311 /* create redundant matrix */ 2312 /*-------------------------*/ 2313 if (reuse == MAT_INITIAL_MATRIX){ 2314 /* compute rownz_max for preallocation */ 2315 for (imdex=0; imdex<nrecvs; imdex++){ 2316 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2317 rptr = rbuf_j[imdex]; 2318 for (i=0; i<j; i++){ 2319 ncols = rptr[i+1] - rptr[i]; 2320 if (rownz_max < ncols) rownz_max = ncols; 2321 } 2322 } 2323 2324 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2325 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2326 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2327 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2328 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2329 } else { 2330 C = *matredundant; 2331 } 2332 2333 /* insert local matrix entries */ 2334 rptr = sbuf_j; 2335 cols = sbuf_j + rend-rstart + 1; 2336 vals = sbuf_a; 2337 for (i=0; i<rend-rstart; i++){ 2338 row = i + rstart; 2339 ncols = rptr[i+1] - rptr[i]; 2340 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 vals += ncols; 2342 cols += ncols; 2343 } 2344 /* insert received matrix entries */ 2345 for (imdex=0; imdex<nrecvs; imdex++){ 2346 rstart = rowrange[recv_rank[imdex]]; 2347 rend = rowrange[recv_rank[imdex]+1]; 2348 rptr = rbuf_j[imdex]; 2349 cols = rbuf_j[imdex] + rend-rstart + 1; 2350 vals = rbuf_a[imdex]; 2351 for (i=0; i<rend-rstart; i++){ 2352 row = i + rstart; 2353 ncols = rptr[i+1] - rptr[i]; 2354 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 vals += ncols; 2356 cols += ncols; 2357 } 2358 } 2359 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2362 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2363 if (reuse == MAT_INITIAL_MATRIX){ 2364 PetscContainer container; 2365 *matredundant = C; 2366 /* create a supporting struct and attach it to C for reuse */ 2367 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2368 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2369 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2370 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2371 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2372 2373 redund->nzlocal = nzlocal; 2374 redund->nsends = nsends; 2375 redund->nrecvs = nrecvs; 2376 redund->send_rank = send_rank; 2377 redund->recv_rank = recv_rank; 2378 redund->sbuf_nz = sbuf_nz; 2379 redund->rbuf_nz = rbuf_nz; 2380 redund->sbuf_j = sbuf_j; 2381 redund->sbuf_a = sbuf_a; 2382 redund->rbuf_j = rbuf_j; 2383 redund->rbuf_a = rbuf_a; 2384 2385 redund->MatDestroy = C->ops->destroy; 2386 C->ops->destroy = MatDestroy_MatRedundant; 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->rmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++){ 2418 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 if (idxb) { 2427 ierr = PetscFree(idxb);CHKERRQ(ierr); 2428 } 2429 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 PetscErrorCode ierr; 2439 PetscInt i,*idxb = 0; 2440 PetscScalar *va,*vb; 2441 Vec vtmp; 2442 2443 PetscFunctionBegin; 2444 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2446 if (idx) { 2447 for (i=0; i<A->cmap->n; i++) { 2448 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2449 } 2450 } 2451 2452 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2453 if (idx) { 2454 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2455 } 2456 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2457 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2458 2459 for (i=0; i<A->rmap->n; i++){ 2460 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2461 va[i] = vb[i]; 2462 if (idx) idx[i] = a->garray[idxb[i]]; 2463 } 2464 } 2465 2466 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2468 if (idxb) { 2469 ierr = PetscFree(idxb);CHKERRQ(ierr); 2470 } 2471 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2478 { 2479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2480 PetscInt n = A->rmap->n; 2481 PetscInt cstart = A->cmap->rstart; 2482 PetscInt *cmap = mat->garray; 2483 PetscInt *diagIdx, *offdiagIdx; 2484 Vec diagV, offdiagV; 2485 PetscScalar *a, *diagA, *offdiagA; 2486 PetscInt r; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2492 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2493 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2494 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2496 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2497 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2498 for(r = 0; r < n; ++r) { 2499 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2500 a[r] = diagA[r]; 2501 idx[r] = cstart + diagIdx[r]; 2502 } else { 2503 a[r] = offdiagA[r]; 2504 idx[r] = cmap[offdiagIdx[r]]; 2505 } 2506 } 2507 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2508 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2509 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2510 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2511 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2512 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2519 { 2520 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2521 PetscInt n = A->rmap->n; 2522 PetscInt cstart = A->cmap->rstart; 2523 PetscInt *cmap = mat->garray; 2524 PetscInt *diagIdx, *offdiagIdx; 2525 Vec diagV, offdiagV; 2526 PetscScalar *a, *diagA, *offdiagA; 2527 PetscInt r; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2532 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2533 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2534 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2535 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2536 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2537 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2538 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2539 for(r = 0; r < n; ++r) { 2540 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 idx[r] = cstart + diagIdx[r]; 2543 } else { 2544 a[r] = offdiagA[r]; 2545 idx[r] = cmap[offdiagIdx[r]]; 2546 } 2547 } 2548 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2549 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2551 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2552 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2553 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2560 { 2561 PetscErrorCode ierr; 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2566 *newmat = *dummy; 2567 ierr = PetscFree(dummy);CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2572 /* -------------------------------------------------------------------*/ 2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2574 MatGetRow_MPIAIJ, 2575 MatRestoreRow_MPIAIJ, 2576 MatMult_MPIAIJ, 2577 /* 4*/ MatMultAdd_MPIAIJ, 2578 MatMultTranspose_MPIAIJ, 2579 MatMultTransposeAdd_MPIAIJ, 2580 #ifdef PETSC_HAVE_PBGL 2581 MatSolve_MPIAIJ, 2582 #else 2583 0, 2584 #endif 2585 0, 2586 0, 2587 /*10*/ 0, 2588 0, 2589 0, 2590 MatSOR_MPIAIJ, 2591 MatTranspose_MPIAIJ, 2592 /*15*/ MatGetInfo_MPIAIJ, 2593 MatEqual_MPIAIJ, 2594 MatGetDiagonal_MPIAIJ, 2595 MatDiagonalScale_MPIAIJ, 2596 MatNorm_MPIAIJ, 2597 /*20*/ MatAssemblyBegin_MPIAIJ, 2598 MatAssemblyEnd_MPIAIJ, 2599 MatSetOption_MPIAIJ, 2600 MatZeroEntries_MPIAIJ, 2601 /*24*/ MatZeroRows_MPIAIJ, 2602 0, 2603 #ifdef PETSC_HAVE_PBGL 2604 0, 2605 #else 2606 0, 2607 #endif 2608 0, 2609 0, 2610 /*29*/ MatSetUpPreallocation_MPIAIJ, 2611 #ifdef PETSC_HAVE_PBGL 2612 0, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 /*49*/ MatSetBlockSize_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 0, 2649 /*64*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 #if defined(PETSC_HAVE_ADIC) 2659 MatSetValuesAdic_MPIAIJ, 2660 #else 2661 0, 2662 #endif 2663 MatSetValuesAdifor_MPIAIJ, 2664 /*75*/ MatFDColoringApply_AIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*80*/ 0, 2670 0, 2671 0, 2672 /*83*/ MatLoad_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2679 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2680 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2681 MatPtAP_Basic, 2682 MatPtAPSymbolic_MPIAIJ, 2683 /*94*/ MatPtAPNumeric_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*99*/ 0, 2689 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2690 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2691 MatConjugate_MPIAIJ, 2692 0, 2693 /*104*/MatSetValuesRow_MPIAIJ, 2694 MatRealPart_MPIAIJ, 2695 MatImaginaryPart_MPIAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 MatGetRedundantMatrix_MPIAIJ, 2700 MatGetRowMin_MPIAIJ, 2701 0, 2702 0, 2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2704 0, 2705 0, 2706 0, 2707 0, 2708 0 2709 }; 2710 2711 /* ----------------------------------------------------------------------------------------*/ 2712 2713 EXTERN_C_BEGIN 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2716 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2717 { 2718 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2719 PetscErrorCode ierr; 2720 2721 PetscFunctionBegin; 2722 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2723 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 EXTERN_C_END 2727 2728 EXTERN_C_BEGIN 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2731 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2732 { 2733 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2734 PetscErrorCode ierr; 2735 2736 PetscFunctionBegin; 2737 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2738 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 EXTERN_C_END 2742 2743 #include "petscpc.h" 2744 EXTERN_C_BEGIN 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2747 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2748 { 2749 Mat_MPIAIJ *b; 2750 PetscErrorCode ierr; 2751 PetscInt i; 2752 2753 PetscFunctionBegin; 2754 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2755 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2756 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2757 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2758 2759 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2760 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2761 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2762 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2763 if (d_nnz) { 2764 for (i=0; i<B->rmap->n; i++) { 2765 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2766 } 2767 } 2768 if (o_nnz) { 2769 for (i=0; i<B->rmap->n; i++) { 2770 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2771 } 2772 } 2773 b = (Mat_MPIAIJ*)B->data; 2774 2775 if (!B->preallocated) { 2776 /* Explicitly create 2 MATSEQAIJ matrices. */ 2777 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2778 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2779 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2783 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2785 } 2786 2787 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2788 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2789 B->preallocated = PETSC_TRUE; 2790 PetscFunctionReturn(0); 2791 } 2792 EXTERN_C_END 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2796 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2797 { 2798 Mat mat; 2799 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2800 PetscErrorCode ierr; 2801 2802 PetscFunctionBegin; 2803 *newmat = 0; 2804 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2805 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2806 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2807 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2808 a = (Mat_MPIAIJ*)mat->data; 2809 2810 mat->factortype = matin->factortype; 2811 mat->rmap->bs = matin->rmap->bs; 2812 mat->assembled = PETSC_TRUE; 2813 mat->insertmode = NOT_SET_VALUES; 2814 mat->preallocated = PETSC_TRUE; 2815 2816 a->size = oldmat->size; 2817 a->rank = oldmat->rank; 2818 a->donotstash = oldmat->donotstash; 2819 a->roworiented = oldmat->roworiented; 2820 a->rowindices = 0; 2821 a->rowvalues = 0; 2822 a->getrowactive = PETSC_FALSE; 2823 2824 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2825 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2826 2827 if (oldmat->colmap) { 2828 #if defined (PETSC_USE_CTABLE) 2829 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2830 #else 2831 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2832 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2833 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2834 #endif 2835 } else a->colmap = 0; 2836 if (oldmat->garray) { 2837 PetscInt len; 2838 len = oldmat->B->cmap->n; 2839 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2840 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2841 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2842 } else a->garray = 0; 2843 2844 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2846 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2848 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2850 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2851 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2852 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2853 *newmat = mat; 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatLoad_MPIAIJ" 2859 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2860 { 2861 Mat A; 2862 PetscScalar *vals,*svals; 2863 MPI_Comm comm = ((PetscObject)viewer)->comm; 2864 MPI_Status status; 2865 PetscErrorCode ierr; 2866 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2867 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2868 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2869 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2870 PetscInt cend,cstart,n,*rowners; 2871 int fd; 2872 2873 PetscFunctionBegin; 2874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2875 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2876 if (!rank) { 2877 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2878 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2879 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2880 } 2881 2882 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2883 M = header[1]; N = header[2]; 2884 /* determine ownership of all rows */ 2885 m = M/size + ((M % size) > rank); 2886 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2887 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2888 2889 /* First process needs enough room for process with most rows */ 2890 if (!rank) { 2891 mmax = rowners[1]; 2892 for (i=2; i<size; i++) { 2893 mmax = PetscMax(mmax,rowners[i]); 2894 } 2895 } else mmax = m; 2896 2897 rowners[0] = 0; 2898 for (i=2; i<=size; i++) { 2899 rowners[i] += rowners[i-1]; 2900 } 2901 rstart = rowners[rank]; 2902 rend = rowners[rank+1]; 2903 2904 /* distribute row lengths to all processors */ 2905 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2906 if (!rank) { 2907 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2908 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2909 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2910 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2911 for (j=0; j<m; j++) { 2912 procsnz[0] += ourlens[j]; 2913 } 2914 for (i=1; i<size; i++) { 2915 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2916 /* calculate the number of nonzeros on each processor */ 2917 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2918 procsnz[i] += rowlengths[j]; 2919 } 2920 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2921 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2922 } 2923 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2924 } else { 2925 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2926 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2927 } 2928 2929 if (!rank) { 2930 /* determine max buffer needed and allocate it */ 2931 maxnz = 0; 2932 for (i=0; i<size; i++) { 2933 maxnz = PetscMax(maxnz,procsnz[i]); 2934 } 2935 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2936 2937 /* read in my part of the matrix column indices */ 2938 nz = procsnz[0]; 2939 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2940 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2941 2942 /* read in every one elses and ship off */ 2943 for (i=1; i<size; i++) { 2944 nz = procsnz[i]; 2945 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2946 mpicnt = PetscMPIIntCast(nz); 2947 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(cols);CHKERRQ(ierr); 2950 } else { 2951 /* determine buffer space needed for message */ 2952 nz = 0; 2953 for (i=0; i<m; i++) { 2954 nz += ourlens[i]; 2955 } 2956 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2957 2958 /* receive message of column indices*/ 2959 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2960 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2961 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2962 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2963 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2964 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2965 } 2966 2967 /* determine column ownership if matrix is not square */ 2968 if (N != M) { 2969 n = N/size + ((N % size) > rank); 2970 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2971 cstart = cend - n; 2972 } else { 2973 cstart = rstart; 2974 cend = rend; 2975 n = cend - cstart; 2976 } 2977 2978 /* loop over local rows, determining number of off diagonal entries */ 2979 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2980 jj = 0; 2981 for (i=0; i<m; i++) { 2982 for (j=0; j<ourlens[i]; j++) { 2983 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2984 jj++; 2985 } 2986 } 2987 2988 /* create our matrix */ 2989 for (i=0; i<m; i++) { 2990 ourlens[i] -= offlens[i]; 2991 } 2992 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2993 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2994 ierr = MatSetType(A,type);CHKERRQ(ierr); 2995 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2996 2997 for (i=0; i<m; i++) { 2998 ourlens[i] += offlens[i]; 2999 } 3000 3001 if (!rank) { 3002 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3003 3004 /* read in my part of the matrix numerical values */ 3005 nz = procsnz[0]; 3006 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3007 3008 /* insert into matrix */ 3009 jj = rstart; 3010 smycols = mycols; 3011 svals = vals; 3012 for (i=0; i<m; i++) { 3013 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3014 smycols += ourlens[i]; 3015 svals += ourlens[i]; 3016 jj++; 3017 } 3018 3019 /* read in other processors and ship out */ 3020 for (i=1; i<size; i++) { 3021 nz = procsnz[i]; 3022 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3023 mpicnt = PetscMPIIntCast(nz); 3024 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3025 } 3026 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3027 } else { 3028 /* receive numeric values */ 3029 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3030 3031 /* receive message of values*/ 3032 mpicnt = PetscMPIIntCast(nz); 3033 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3034 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3035 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3036 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3037 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3038 3039 /* insert into matrix */ 3040 jj = rstart; 3041 smycols = mycols; 3042 svals = vals; 3043 for (i=0; i<m; i++) { 3044 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3045 smycols += ourlens[i]; 3046 svals += ourlens[i]; 3047 jj++; 3048 } 3049 } 3050 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3051 ierr = PetscFree(vals);CHKERRQ(ierr); 3052 ierr = PetscFree(mycols);CHKERRQ(ierr); 3053 ierr = PetscFree(rowners);CHKERRQ(ierr); 3054 3055 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 *newmat = A; 3058 PetscFunctionReturn(0); 3059 } 3060 3061 #undef __FUNCT__ 3062 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3063 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3064 { 3065 PetscErrorCode ierr; 3066 IS iscol_local; 3067 PetscInt csize; 3068 3069 PetscFunctionBegin; 3070 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3071 if (call == MAT_REUSE_MATRIX) { 3072 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3073 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3074 } else { 3075 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3076 } 3077 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3078 if (call == MAT_INITIAL_MATRIX) { 3079 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3080 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3081 } 3082 PetscFunctionReturn(0); 3083 } 3084 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3087 /* 3088 Not great since it makes two copies of the submatrix, first an SeqAIJ 3089 in local and then by concatenating the local matrices the end result. 3090 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3091 3092 Note: This requires a sequential iscol with all indices. 3093 */ 3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3095 { 3096 PetscErrorCode ierr; 3097 PetscMPIInt rank,size; 3098 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3099 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3100 Mat *local,M,Mreuse; 3101 MatScalar *vwork,*aa; 3102 MPI_Comm comm = ((PetscObject)mat)->comm; 3103 Mat_SeqAIJ *aij; 3104 3105 3106 PetscFunctionBegin; 3107 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3109 3110 if (call == MAT_REUSE_MATRIX) { 3111 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3112 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3113 local = &Mreuse; 3114 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3115 } else { 3116 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3117 Mreuse = *local; 3118 ierr = PetscFree(local);CHKERRQ(ierr); 3119 } 3120 3121 /* 3122 m - number of local rows 3123 n - number of columns (same on all processors) 3124 rstart - first row in new global matrix generated 3125 */ 3126 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3127 if (call == MAT_INITIAL_MATRIX) { 3128 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3129 ii = aij->i; 3130 jj = aij->j; 3131 3132 /* 3133 Determine the number of non-zeros in the diagonal and off-diagonal 3134 portions of the matrix in order to do correct preallocation 3135 */ 3136 3137 /* first get start and end of "diagonal" columns */ 3138 if (csize == PETSC_DECIDE) { 3139 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3140 if (mglobal == n) { /* square matrix */ 3141 nlocal = m; 3142 } else { 3143 nlocal = n/size + ((n % size) > rank); 3144 } 3145 } else { 3146 nlocal = csize; 3147 } 3148 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3149 rstart = rend - nlocal; 3150 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3151 3152 /* next, compute all the lengths */ 3153 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3154 olens = dlens + m; 3155 for (i=0; i<m; i++) { 3156 jend = ii[i+1] - ii[i]; 3157 olen = 0; 3158 dlen = 0; 3159 for (j=0; j<jend; j++) { 3160 if (*jj < rstart || *jj >= rend) olen++; 3161 else dlen++; 3162 jj++; 3163 } 3164 olens[i] = olen; 3165 dlens[i] = dlen; 3166 } 3167 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3168 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3169 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3170 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3171 ierr = PetscFree(dlens);CHKERRQ(ierr); 3172 } else { 3173 PetscInt ml,nl; 3174 3175 M = *newmat; 3176 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3177 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3178 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3179 /* 3180 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3181 rather than the slower MatSetValues(). 3182 */ 3183 M->was_assembled = PETSC_TRUE; 3184 M->assembled = PETSC_FALSE; 3185 } 3186 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3187 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3188 ii = aij->i; 3189 jj = aij->j; 3190 aa = aij->a; 3191 for (i=0; i<m; i++) { 3192 row = rstart + i; 3193 nz = ii[i+1] - ii[i]; 3194 cwork = jj; jj += nz; 3195 vwork = aa; aa += nz; 3196 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3197 } 3198 3199 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3200 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3201 *newmat = M; 3202 3203 /* save submatrix used in processor for next request */ 3204 if (call == MAT_INITIAL_MATRIX) { 3205 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3206 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3207 } 3208 3209 PetscFunctionReturn(0); 3210 } 3211 3212 EXTERN_C_BEGIN 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3215 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3216 { 3217 PetscInt m,cstart, cend,j,nnz,i,d; 3218 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3219 const PetscInt *JJ; 3220 PetscScalar *values; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3225 3226 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3227 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3228 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3229 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3230 m = B->rmap->n; 3231 cstart = B->cmap->rstart; 3232 cend = B->cmap->rend; 3233 rstart = B->rmap->rstart; 3234 3235 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3236 3237 #if defined(PETSC_USE_DEBUGGING) 3238 for (i=0; i<m; i++) { 3239 nnz = Ii[i+1]- Ii[i]; 3240 JJ = J + Ii[i]; 3241 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3242 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3243 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3244 } 3245 #endif 3246 3247 for (i=0; i<m; i++) { 3248 nnz = Ii[i+1]- Ii[i]; 3249 JJ = J + Ii[i]; 3250 nnz_max = PetscMax(nnz_max,nnz); 3251 d = 0; 3252 for (j=0; j<nnz; j++) { 3253 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3254 } 3255 d_nnz[i] = d; 3256 o_nnz[i] = nnz - d; 3257 } 3258 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3259 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3260 3261 if (v) values = (PetscScalar*)v; 3262 else { 3263 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3264 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3265 } 3266 3267 for (i=0; i<m; i++) { 3268 ii = i + rstart; 3269 nnz = Ii[i+1]- Ii[i]; 3270 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3271 } 3272 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3273 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3274 3275 if (!v) { 3276 ierr = PetscFree(values);CHKERRQ(ierr); 3277 } 3278 PetscFunctionReturn(0); 3279 } 3280 EXTERN_C_END 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3284 /*@ 3285 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3286 (the default parallel PETSc format). 3287 3288 Collective on MPI_Comm 3289 3290 Input Parameters: 3291 + B - the matrix 3292 . i - the indices into j for the start of each local row (starts with zero) 3293 . j - the column indices for each local row (starts with zero) 3294 - v - optional values in the matrix 3295 3296 Level: developer 3297 3298 Notes: 3299 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3300 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3301 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3302 3303 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3304 3305 The format which is used for the sparse matrix input, is equivalent to a 3306 row-major ordering.. i.e for the following matrix, the input data expected is 3307 as shown: 3308 3309 1 0 0 3310 2 0 3 P0 3311 ------- 3312 4 5 6 P1 3313 3314 Process0 [P0]: rows_owned=[0,1] 3315 i = {0,1,3} [size = nrow+1 = 2+1] 3316 j = {0,0,2} [size = nz = 6] 3317 v = {1,2,3} [size = nz = 6] 3318 3319 Process1 [P1]: rows_owned=[2] 3320 i = {0,3} [size = nrow+1 = 1+1] 3321 j = {0,1,2} [size = nz = 6] 3322 v = {4,5,6} [size = nz = 6] 3323 3324 .keywords: matrix, aij, compressed row, sparse, parallel 3325 3326 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3327 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3328 @*/ 3329 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3330 { 3331 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3332 3333 PetscFunctionBegin; 3334 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3335 if (f) { 3336 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3337 } 3338 PetscFunctionReturn(0); 3339 } 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3343 /*@C 3344 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3345 (the default parallel PETSc format). For good matrix assembly performance 3346 the user should preallocate the matrix storage by setting the parameters 3347 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3348 performance can be increased by more than a factor of 50. 3349 3350 Collective on MPI_Comm 3351 3352 Input Parameters: 3353 + A - the matrix 3354 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3355 (same value is used for all local rows) 3356 . d_nnz - array containing the number of nonzeros in the various rows of the 3357 DIAGONAL portion of the local submatrix (possibly different for each row) 3358 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3359 The size of this array is equal to the number of local rows, i.e 'm'. 3360 You must leave room for the diagonal entry even if it is zero. 3361 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3362 submatrix (same value is used for all local rows). 3363 - o_nnz - array containing the number of nonzeros in the various rows of the 3364 OFF-DIAGONAL portion of the local submatrix (possibly different for 3365 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3366 structure. The size of this array is equal to the number 3367 of local rows, i.e 'm'. 3368 3369 If the *_nnz parameter is given then the *_nz parameter is ignored 3370 3371 The AIJ format (also called the Yale sparse matrix format or 3372 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3373 storage. The stored row and column indices begin with zero. See the users manual for details. 3374 3375 The parallel matrix is partitioned such that the first m0 rows belong to 3376 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3377 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3378 3379 The DIAGONAL portion of the local submatrix of a processor can be defined 3380 as the submatrix which is obtained by extraction the part corresponding 3381 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3382 first row that belongs to the processor, and r2 is the last row belonging 3383 to the this processor. This is a square mxm matrix. The remaining portion 3384 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3385 3386 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3387 3388 You can call MatGetInfo() to get information on how effective the preallocation was; 3389 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3390 You can also run with the option -info and look for messages with the string 3391 malloc in them to see if additional memory allocation was needed. 3392 3393 Example usage: 3394 3395 Consider the following 8x8 matrix with 34 non-zero values, that is 3396 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3397 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3398 as follows: 3399 3400 .vb 3401 1 2 0 | 0 3 0 | 0 4 3402 Proc0 0 5 6 | 7 0 0 | 8 0 3403 9 0 10 | 11 0 0 | 12 0 3404 ------------------------------------- 3405 13 0 14 | 15 16 17 | 0 0 3406 Proc1 0 18 0 | 19 20 21 | 0 0 3407 0 0 0 | 22 23 0 | 24 0 3408 ------------------------------------- 3409 Proc2 25 26 27 | 0 0 28 | 29 0 3410 30 0 0 | 31 32 33 | 0 34 3411 .ve 3412 3413 This can be represented as a collection of submatrices as: 3414 3415 .vb 3416 A B C 3417 D E F 3418 G H I 3419 .ve 3420 3421 Where the submatrices A,B,C are owned by proc0, D,E,F are 3422 owned by proc1, G,H,I are owned by proc2. 3423 3424 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3425 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3426 The 'M','N' parameters are 8,8, and have the same values on all procs. 3427 3428 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3429 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3430 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3431 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3432 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3433 matrix, ans [DF] as another SeqAIJ matrix. 3434 3435 When d_nz, o_nz parameters are specified, d_nz storage elements are 3436 allocated for every row of the local diagonal submatrix, and o_nz 3437 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3438 One way to choose d_nz and o_nz is to use the max nonzerors per local 3439 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3440 In this case, the values of d_nz,o_nz are: 3441 .vb 3442 proc0 : dnz = 2, o_nz = 2 3443 proc1 : dnz = 3, o_nz = 2 3444 proc2 : dnz = 1, o_nz = 4 3445 .ve 3446 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3447 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3448 for proc3. i.e we are using 12+15+10=37 storage locations to store 3449 34 values. 3450 3451 When d_nnz, o_nnz parameters are specified, the storage is specified 3452 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3453 In the above case the values for d_nnz,o_nnz are: 3454 .vb 3455 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3456 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3457 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3458 .ve 3459 Here the space allocated is sum of all the above values i.e 34, and 3460 hence pre-allocation is perfect. 3461 3462 Level: intermediate 3463 3464 .keywords: matrix, aij, compressed row, sparse, parallel 3465 3466 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3467 MPIAIJ, MatGetInfo() 3468 @*/ 3469 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3470 { 3471 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3472 3473 PetscFunctionBegin; 3474 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3475 if (f) { 3476 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3477 } 3478 PetscFunctionReturn(0); 3479 } 3480 3481 #undef __FUNCT__ 3482 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3483 /*@ 3484 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3485 CSR format the local rows. 3486 3487 Collective on MPI_Comm 3488 3489 Input Parameters: 3490 + comm - MPI communicator 3491 . m - number of local rows (Cannot be PETSC_DECIDE) 3492 . n - This value should be the same as the local size used in creating the 3493 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3494 calculated if N is given) For square matrices n is almost always m. 3495 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3496 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3497 . i - row indices 3498 . j - column indices 3499 - a - matrix values 3500 3501 Output Parameter: 3502 . mat - the matrix 3503 3504 Level: intermediate 3505 3506 Notes: 3507 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3508 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3509 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3510 3511 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3512 3513 The format which is used for the sparse matrix input, is equivalent to a 3514 row-major ordering.. i.e for the following matrix, the input data expected is 3515 as shown: 3516 3517 1 0 0 3518 2 0 3 P0 3519 ------- 3520 4 5 6 P1 3521 3522 Process0 [P0]: rows_owned=[0,1] 3523 i = {0,1,3} [size = nrow+1 = 2+1] 3524 j = {0,0,2} [size = nz = 6] 3525 v = {1,2,3} [size = nz = 6] 3526 3527 Process1 [P1]: rows_owned=[2] 3528 i = {0,3} [size = nrow+1 = 1+1] 3529 j = {0,1,2} [size = nz = 6] 3530 v = {4,5,6} [size = nz = 6] 3531 3532 .keywords: matrix, aij, compressed row, sparse, parallel 3533 3534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3535 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3536 @*/ 3537 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3538 { 3539 PetscErrorCode ierr; 3540 3541 PetscFunctionBegin; 3542 if (i[0]) { 3543 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3544 } 3545 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3546 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3547 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3548 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3549 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3550 PetscFunctionReturn(0); 3551 } 3552 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "MatCreateMPIAIJ" 3555 /*@C 3556 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3557 (the default parallel PETSc format). For good matrix assembly performance 3558 the user should preallocate the matrix storage by setting the parameters 3559 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3560 performance can be increased by more than a factor of 50. 3561 3562 Collective on MPI_Comm 3563 3564 Input Parameters: 3565 + comm - MPI communicator 3566 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3567 This value should be the same as the local size used in creating the 3568 y vector for the matrix-vector product y = Ax. 3569 . n - This value should be the same as the local size used in creating the 3570 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3571 calculated if N is given) For square matrices n is almost always m. 3572 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3573 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3574 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3575 (same value is used for all local rows) 3576 . d_nnz - array containing the number of nonzeros in the various rows of the 3577 DIAGONAL portion of the local submatrix (possibly different for each row) 3578 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3579 The size of this array is equal to the number of local rows, i.e 'm'. 3580 You must leave room for the diagonal entry even if it is zero. 3581 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3582 submatrix (same value is used for all local rows). 3583 - o_nnz - array containing the number of nonzeros in the various rows of the 3584 OFF-DIAGONAL portion of the local submatrix (possibly different for 3585 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3586 structure. The size of this array is equal to the number 3587 of local rows, i.e 'm'. 3588 3589 Output Parameter: 3590 . A - the matrix 3591 3592 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3593 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3594 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3595 3596 Notes: 3597 If the *_nnz parameter is given then the *_nz parameter is ignored 3598 3599 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3600 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3601 storage requirements for this matrix. 3602 3603 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3604 processor than it must be used on all processors that share the object for 3605 that argument. 3606 3607 The user MUST specify either the local or global matrix dimensions 3608 (possibly both). 3609 3610 The parallel matrix is partitioned across processors such that the 3611 first m0 rows belong to process 0, the next m1 rows belong to 3612 process 1, the next m2 rows belong to process 2 etc.. where 3613 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3614 values corresponding to [m x N] submatrix. 3615 3616 The columns are logically partitioned with the n0 columns belonging 3617 to 0th partition, the next n1 columns belonging to the next 3618 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3619 3620 The DIAGONAL portion of the local submatrix on any given processor 3621 is the submatrix corresponding to the rows and columns m,n 3622 corresponding to the given processor. i.e diagonal matrix on 3623 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3624 etc. The remaining portion of the local submatrix [m x (N-n)] 3625 constitute the OFF-DIAGONAL portion. The example below better 3626 illustrates this concept. 3627 3628 For a square global matrix we define each processor's diagonal portion 3629 to be its local rows and the corresponding columns (a square submatrix); 3630 each processor's off-diagonal portion encompasses the remainder of the 3631 local matrix (a rectangular submatrix). 3632 3633 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3634 3635 When calling this routine with a single process communicator, a matrix of 3636 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3637 type of communicator, use the construction mechanism: 3638 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3639 3640 By default, this format uses inodes (identical nodes) when possible. 3641 We search for consecutive rows with the same nonzero structure, thereby 3642 reusing matrix information to achieve increased efficiency. 3643 3644 Options Database Keys: 3645 + -mat_no_inode - Do not use inodes 3646 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3647 - -mat_aij_oneindex - Internally use indexing starting at 1 3648 rather than 0. Note that when calling MatSetValues(), 3649 the user still MUST index entries starting at 0! 3650 3651 3652 Example usage: 3653 3654 Consider the following 8x8 matrix with 34 non-zero values, that is 3655 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3656 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3657 as follows: 3658 3659 .vb 3660 1 2 0 | 0 3 0 | 0 4 3661 Proc0 0 5 6 | 7 0 0 | 8 0 3662 9 0 10 | 11 0 0 | 12 0 3663 ------------------------------------- 3664 13 0 14 | 15 16 17 | 0 0 3665 Proc1 0 18 0 | 19 20 21 | 0 0 3666 0 0 0 | 22 23 0 | 24 0 3667 ------------------------------------- 3668 Proc2 25 26 27 | 0 0 28 | 29 0 3669 30 0 0 | 31 32 33 | 0 34 3670 .ve 3671 3672 This can be represented as a collection of submatrices as: 3673 3674 .vb 3675 A B C 3676 D E F 3677 G H I 3678 .ve 3679 3680 Where the submatrices A,B,C are owned by proc0, D,E,F are 3681 owned by proc1, G,H,I are owned by proc2. 3682 3683 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3684 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3685 The 'M','N' parameters are 8,8, and have the same values on all procs. 3686 3687 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3688 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3689 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3690 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3691 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3692 matrix, ans [DF] as another SeqAIJ matrix. 3693 3694 When d_nz, o_nz parameters are specified, d_nz storage elements are 3695 allocated for every row of the local diagonal submatrix, and o_nz 3696 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3697 One way to choose d_nz and o_nz is to use the max nonzerors per local 3698 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3699 In this case, the values of d_nz,o_nz are: 3700 .vb 3701 proc0 : dnz = 2, o_nz = 2 3702 proc1 : dnz = 3, o_nz = 2 3703 proc2 : dnz = 1, o_nz = 4 3704 .ve 3705 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3706 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3707 for proc3. i.e we are using 12+15+10=37 storage locations to store 3708 34 values. 3709 3710 When d_nnz, o_nnz parameters are specified, the storage is specified 3711 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3712 In the above case the values for d_nnz,o_nnz are: 3713 .vb 3714 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3715 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3716 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3717 .ve 3718 Here the space allocated is sum of all the above values i.e 34, and 3719 hence pre-allocation is perfect. 3720 3721 Level: intermediate 3722 3723 .keywords: matrix, aij, compressed row, sparse, parallel 3724 3725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3726 MPIAIJ, MatCreateMPIAIJWithArrays() 3727 @*/ 3728 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3729 { 3730 PetscErrorCode ierr; 3731 PetscMPIInt size; 3732 3733 PetscFunctionBegin; 3734 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3735 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3736 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3737 if (size > 1) { 3738 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3739 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3740 } else { 3741 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3742 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3743 } 3744 PetscFunctionReturn(0); 3745 } 3746 3747 #undef __FUNCT__ 3748 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3749 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3750 { 3751 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3752 3753 PetscFunctionBegin; 3754 *Ad = a->A; 3755 *Ao = a->B; 3756 *colmap = a->garray; 3757 PetscFunctionReturn(0); 3758 } 3759 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3762 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3763 { 3764 PetscErrorCode ierr; 3765 PetscInt i; 3766 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3767 3768 PetscFunctionBegin; 3769 if (coloring->ctype == IS_COLORING_GLOBAL) { 3770 ISColoringValue *allcolors,*colors; 3771 ISColoring ocoloring; 3772 3773 /* set coloring for diagonal portion */ 3774 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3775 3776 /* set coloring for off-diagonal portion */ 3777 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3778 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3779 for (i=0; i<a->B->cmap->n; i++) { 3780 colors[i] = allcolors[a->garray[i]]; 3781 } 3782 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3783 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3784 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3785 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3786 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3787 ISColoringValue *colors; 3788 PetscInt *larray; 3789 ISColoring ocoloring; 3790 3791 /* set coloring for diagonal portion */ 3792 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3793 for (i=0; i<a->A->cmap->n; i++) { 3794 larray[i] = i + A->cmap->rstart; 3795 } 3796 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3797 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3798 for (i=0; i<a->A->cmap->n; i++) { 3799 colors[i] = coloring->colors[larray[i]]; 3800 } 3801 ierr = PetscFree(larray);CHKERRQ(ierr); 3802 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3803 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3804 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3805 3806 /* set coloring for off-diagonal portion */ 3807 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3808 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3809 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3810 for (i=0; i<a->B->cmap->n; i++) { 3811 colors[i] = coloring->colors[larray[i]]; 3812 } 3813 ierr = PetscFree(larray);CHKERRQ(ierr); 3814 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3815 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3816 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3817 } else { 3818 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3819 } 3820 3821 PetscFunctionReturn(0); 3822 } 3823 3824 #if defined(PETSC_HAVE_ADIC) 3825 #undef __FUNCT__ 3826 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3827 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3828 { 3829 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3830 PetscErrorCode ierr; 3831 3832 PetscFunctionBegin; 3833 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3834 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3835 PetscFunctionReturn(0); 3836 } 3837 #endif 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3841 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3842 { 3843 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3844 PetscErrorCode ierr; 3845 3846 PetscFunctionBegin; 3847 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3848 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3849 PetscFunctionReturn(0); 3850 } 3851 3852 #undef __FUNCT__ 3853 #define __FUNCT__ "MatMerge" 3854 /*@ 3855 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3856 matrices from each processor 3857 3858 Collective on MPI_Comm 3859 3860 Input Parameters: 3861 + comm - the communicators the parallel matrix will live on 3862 . inmat - the input sequential matrices 3863 . n - number of local columns (or PETSC_DECIDE) 3864 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3865 3866 Output Parameter: 3867 . outmat - the parallel matrix generated 3868 3869 Level: advanced 3870 3871 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3872 3873 @*/ 3874 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3875 { 3876 PetscErrorCode ierr; 3877 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3878 PetscInt *indx; 3879 PetscScalar *values; 3880 3881 PetscFunctionBegin; 3882 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3883 if (scall == MAT_INITIAL_MATRIX){ 3884 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3885 if (n == PETSC_DECIDE){ 3886 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3887 } 3888 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3889 rstart -= m; 3890 3891 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3892 for (i=0;i<m;i++) { 3893 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3894 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3895 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3896 } 3897 /* This routine will ONLY return MPIAIJ type matrix */ 3898 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3899 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3900 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3901 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3902 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3903 3904 } else if (scall == MAT_REUSE_MATRIX){ 3905 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3906 } else { 3907 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3908 } 3909 3910 for (i=0;i<m;i++) { 3911 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3912 Ii = i + rstart; 3913 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3914 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3915 } 3916 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3917 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3918 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3919 3920 PetscFunctionReturn(0); 3921 } 3922 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "MatFileSplit" 3925 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3926 { 3927 PetscErrorCode ierr; 3928 PetscMPIInt rank; 3929 PetscInt m,N,i,rstart,nnz; 3930 size_t len; 3931 const PetscInt *indx; 3932 PetscViewer out; 3933 char *name; 3934 Mat B; 3935 const PetscScalar *values; 3936 3937 PetscFunctionBegin; 3938 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3939 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3940 /* Should this be the type of the diagonal block of A? */ 3941 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3942 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3943 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3944 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3945 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3946 for (i=0;i<m;i++) { 3947 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3948 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3949 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3950 } 3951 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3952 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3953 3954 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3955 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3956 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3957 sprintf(name,"%s.%d",outfile,rank); 3958 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3959 ierr = PetscFree(name); 3960 ierr = MatView(B,out);CHKERRQ(ierr); 3961 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3962 ierr = MatDestroy(B);CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3967 #undef __FUNCT__ 3968 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3969 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3970 { 3971 PetscErrorCode ierr; 3972 Mat_Merge_SeqsToMPI *merge; 3973 PetscContainer container; 3974 3975 PetscFunctionBegin; 3976 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3977 if (container) { 3978 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3979 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3980 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3981 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3982 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3983 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3984 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3985 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3986 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3987 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3988 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3989 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3991 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 3992 3993 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 3994 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3995 } 3996 ierr = PetscFree(merge);CHKERRQ(ierr); 3997 3998 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3999 PetscFunctionReturn(0); 4000 } 4001 4002 #include "../src/mat/utils/freespace.h" 4003 #include "petscbt.h" 4004 4005 #undef __FUNCT__ 4006 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4007 /*@C 4008 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4009 matrices from each processor 4010 4011 Collective on MPI_Comm 4012 4013 Input Parameters: 4014 + comm - the communicators the parallel matrix will live on 4015 . seqmat - the input sequential matrices 4016 . m - number of local rows (or PETSC_DECIDE) 4017 . n - number of local columns (or PETSC_DECIDE) 4018 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4019 4020 Output Parameter: 4021 . mpimat - the parallel matrix generated 4022 4023 Level: advanced 4024 4025 Notes: 4026 The dimensions of the sequential matrix in each processor MUST be the same. 4027 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4028 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4029 @*/ 4030 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4031 { 4032 PetscErrorCode ierr; 4033 MPI_Comm comm=((PetscObject)mpimat)->comm; 4034 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4035 PetscMPIInt size,rank,taga,*len_s; 4036 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4037 PetscInt proc,m; 4038 PetscInt **buf_ri,**buf_rj; 4039 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4040 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4041 MPI_Request *s_waits,*r_waits; 4042 MPI_Status *status; 4043 MatScalar *aa=a->a; 4044 MatScalar **abuf_r,*ba_i; 4045 Mat_Merge_SeqsToMPI *merge; 4046 PetscContainer container; 4047 4048 PetscFunctionBegin; 4049 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4050 4051 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4052 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4053 4054 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4055 if (container) { 4056 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4057 } 4058 bi = merge->bi; 4059 bj = merge->bj; 4060 buf_ri = merge->buf_ri; 4061 buf_rj = merge->buf_rj; 4062 4063 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4064 owners = merge->rowmap->range; 4065 len_s = merge->len_s; 4066 4067 /* send and recv matrix values */ 4068 /*-----------------------------*/ 4069 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4070 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4071 4072 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4073 for (proc=0,k=0; proc<size; proc++){ 4074 if (!len_s[proc]) continue; 4075 i = owners[proc]; 4076 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4077 k++; 4078 } 4079 4080 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4081 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4082 ierr = PetscFree(status);CHKERRQ(ierr); 4083 4084 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4085 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4086 4087 /* insert mat values of mpimat */ 4088 /*----------------------------*/ 4089 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4090 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4091 4092 for (k=0; k<merge->nrecv; k++){ 4093 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4094 nrows = *(buf_ri_k[k]); 4095 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4096 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4097 } 4098 4099 /* set values of ba */ 4100 m = merge->rowmap->n; 4101 for (i=0; i<m; i++) { 4102 arow = owners[rank] + i; 4103 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4104 bnzi = bi[i+1] - bi[i]; 4105 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4106 4107 /* add local non-zero vals of this proc's seqmat into ba */ 4108 anzi = ai[arow+1] - ai[arow]; 4109 aj = a->j + ai[arow]; 4110 aa = a->a + ai[arow]; 4111 nextaj = 0; 4112 for (j=0; nextaj<anzi; j++){ 4113 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4114 ba_i[j] += aa[nextaj++]; 4115 } 4116 } 4117 4118 /* add received vals into ba */ 4119 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4120 /* i-th row */ 4121 if (i == *nextrow[k]) { 4122 anzi = *(nextai[k]+1) - *nextai[k]; 4123 aj = buf_rj[k] + *(nextai[k]); 4124 aa = abuf_r[k] + *(nextai[k]); 4125 nextaj = 0; 4126 for (j=0; nextaj<anzi; j++){ 4127 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4128 ba_i[j] += aa[nextaj++]; 4129 } 4130 } 4131 nextrow[k]++; nextai[k]++; 4132 } 4133 } 4134 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4135 } 4136 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4137 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4138 4139 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4140 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4141 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4142 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4143 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4144 PetscFunctionReturn(0); 4145 } 4146 4147 #undef __FUNCT__ 4148 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4149 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4150 { 4151 PetscErrorCode ierr; 4152 Mat B_mpi; 4153 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4154 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4155 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4156 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4157 PetscInt len,proc,*dnz,*onz; 4158 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4159 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4160 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4161 MPI_Status *status; 4162 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4163 PetscBT lnkbt; 4164 Mat_Merge_SeqsToMPI *merge; 4165 PetscContainer container; 4166 4167 PetscFunctionBegin; 4168 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4169 4170 /* make sure it is a PETSc comm */ 4171 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4173 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4174 4175 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4176 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4177 4178 /* determine row ownership */ 4179 /*---------------------------------------------------------*/ 4180 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4181 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4182 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4183 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4184 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4185 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4186 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4187 4188 m = merge->rowmap->n; 4189 M = merge->rowmap->N; 4190 owners = merge->rowmap->range; 4191 4192 /* determine the number of messages to send, their lengths */ 4193 /*---------------------------------------------------------*/ 4194 len_s = merge->len_s; 4195 4196 len = 0; /* length of buf_si[] */ 4197 merge->nsend = 0; 4198 for (proc=0; proc<size; proc++){ 4199 len_si[proc] = 0; 4200 if (proc == rank){ 4201 len_s[proc] = 0; 4202 } else { 4203 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4204 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4205 } 4206 if (len_s[proc]) { 4207 merge->nsend++; 4208 nrows = 0; 4209 for (i=owners[proc]; i<owners[proc+1]; i++){ 4210 if (ai[i+1] > ai[i]) nrows++; 4211 } 4212 len_si[proc] = 2*(nrows+1); 4213 len += len_si[proc]; 4214 } 4215 } 4216 4217 /* determine the number and length of messages to receive for ij-structure */ 4218 /*-------------------------------------------------------------------------*/ 4219 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4220 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4221 4222 /* post the Irecv of j-structure */ 4223 /*-------------------------------*/ 4224 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4225 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4226 4227 /* post the Isend of j-structure */ 4228 /*--------------------------------*/ 4229 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4230 4231 for (proc=0, k=0; proc<size; proc++){ 4232 if (!len_s[proc]) continue; 4233 i = owners[proc]; 4234 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4235 k++; 4236 } 4237 4238 /* receives and sends of j-structure are complete */ 4239 /*------------------------------------------------*/ 4240 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4241 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4242 4243 /* send and recv i-structure */ 4244 /*---------------------------*/ 4245 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4246 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4247 4248 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4249 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4250 for (proc=0,k=0; proc<size; proc++){ 4251 if (!len_s[proc]) continue; 4252 /* form outgoing message for i-structure: 4253 buf_si[0]: nrows to be sent 4254 [1:nrows]: row index (global) 4255 [nrows+1:2*nrows+1]: i-structure index 4256 */ 4257 /*-------------------------------------------*/ 4258 nrows = len_si[proc]/2 - 1; 4259 buf_si_i = buf_si + nrows+1; 4260 buf_si[0] = nrows; 4261 buf_si_i[0] = 0; 4262 nrows = 0; 4263 for (i=owners[proc]; i<owners[proc+1]; i++){ 4264 anzi = ai[i+1] - ai[i]; 4265 if (anzi) { 4266 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4267 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4268 nrows++; 4269 } 4270 } 4271 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4272 k++; 4273 buf_si += len_si[proc]; 4274 } 4275 4276 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4277 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4278 4279 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4280 for (i=0; i<merge->nrecv; i++){ 4281 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4282 } 4283 4284 ierr = PetscFree(len_si);CHKERRQ(ierr); 4285 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4286 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4287 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4288 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4289 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4290 ierr = PetscFree(status);CHKERRQ(ierr); 4291 4292 /* compute a local seq matrix in each processor */ 4293 /*----------------------------------------------*/ 4294 /* allocate bi array and free space for accumulating nonzero column info */ 4295 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4296 bi[0] = 0; 4297 4298 /* create and initialize a linked list */ 4299 nlnk = N+1; 4300 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4301 4302 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4303 len = 0; 4304 len = ai[owners[rank+1]] - ai[owners[rank]]; 4305 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4306 current_space = free_space; 4307 4308 /* determine symbolic info for each local row */ 4309 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4310 4311 for (k=0; k<merge->nrecv; k++){ 4312 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4313 nrows = *buf_ri_k[k]; 4314 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4315 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4316 } 4317 4318 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4319 len = 0; 4320 for (i=0;i<m;i++) { 4321 bnzi = 0; 4322 /* add local non-zero cols of this proc's seqmat into lnk */ 4323 arow = owners[rank] + i; 4324 anzi = ai[arow+1] - ai[arow]; 4325 aj = a->j + ai[arow]; 4326 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4327 bnzi += nlnk; 4328 /* add received col data into lnk */ 4329 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4330 if (i == *nextrow[k]) { /* i-th row */ 4331 anzi = *(nextai[k]+1) - *nextai[k]; 4332 aj = buf_rj[k] + *nextai[k]; 4333 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4334 bnzi += nlnk; 4335 nextrow[k]++; nextai[k]++; 4336 } 4337 } 4338 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4339 4340 /* if free space is not available, make more free space */ 4341 if (current_space->local_remaining<bnzi) { 4342 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4343 nspacedouble++; 4344 } 4345 /* copy data into free space, then initialize lnk */ 4346 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4347 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4348 4349 current_space->array += bnzi; 4350 current_space->local_used += bnzi; 4351 current_space->local_remaining -= bnzi; 4352 4353 bi[i+1] = bi[i] + bnzi; 4354 } 4355 4356 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4357 4358 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4359 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4360 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4361 4362 /* create symbolic parallel matrix B_mpi */ 4363 /*---------------------------------------*/ 4364 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4365 if (n==PETSC_DECIDE) { 4366 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4367 } else { 4368 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4369 } 4370 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4371 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4372 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4373 4374 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4375 B_mpi->assembled = PETSC_FALSE; 4376 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4377 merge->bi = bi; 4378 merge->bj = bj; 4379 merge->buf_ri = buf_ri; 4380 merge->buf_rj = buf_rj; 4381 merge->coi = PETSC_NULL; 4382 merge->coj = PETSC_NULL; 4383 merge->owners_co = PETSC_NULL; 4384 4385 /* attach the supporting struct to B_mpi for reuse */ 4386 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4387 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4388 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4389 *mpimat = B_mpi; 4390 4391 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4392 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4393 PetscFunctionReturn(0); 4394 } 4395 4396 #undef __FUNCT__ 4397 #define __FUNCT__ "MatMerge_SeqsToMPI" 4398 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4399 { 4400 PetscErrorCode ierr; 4401 4402 PetscFunctionBegin; 4403 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4404 if (scall == MAT_INITIAL_MATRIX){ 4405 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4406 } 4407 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4408 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4409 PetscFunctionReturn(0); 4410 } 4411 4412 #undef __FUNCT__ 4413 #define __FUNCT__ "MatGetLocalMat" 4414 /*@ 4415 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4416 4417 Not Collective 4418 4419 Input Parameters: 4420 + A - the matrix 4421 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4422 4423 Output Parameter: 4424 . A_loc - the local sequential matrix generated 4425 4426 Level: developer 4427 4428 @*/ 4429 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4430 { 4431 PetscErrorCode ierr; 4432 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4433 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4434 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4435 MatScalar *aa=a->a,*ba=b->a,*cam; 4436 PetscScalar *ca; 4437 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4438 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4439 4440 PetscFunctionBegin; 4441 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4442 if (scall == MAT_INITIAL_MATRIX){ 4443 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4444 ci[0] = 0; 4445 for (i=0; i<am; i++){ 4446 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4447 } 4448 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4449 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4450 k = 0; 4451 for (i=0; i<am; i++) { 4452 ncols_o = bi[i+1] - bi[i]; 4453 ncols_d = ai[i+1] - ai[i]; 4454 /* off-diagonal portion of A */ 4455 for (jo=0; jo<ncols_o; jo++) { 4456 col = cmap[*bj]; 4457 if (col >= cstart) break; 4458 cj[k] = col; bj++; 4459 ca[k++] = *ba++; 4460 } 4461 /* diagonal portion of A */ 4462 for (j=0; j<ncols_d; j++) { 4463 cj[k] = cstart + *aj++; 4464 ca[k++] = *aa++; 4465 } 4466 /* off-diagonal portion of A */ 4467 for (j=jo; j<ncols_o; j++) { 4468 cj[k] = cmap[*bj++]; 4469 ca[k++] = *ba++; 4470 } 4471 } 4472 /* put together the new matrix */ 4473 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4474 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4475 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4476 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4477 mat->free_a = PETSC_TRUE; 4478 mat->free_ij = PETSC_TRUE; 4479 mat->nonew = 0; 4480 } else if (scall == MAT_REUSE_MATRIX){ 4481 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4482 ci = mat->i; cj = mat->j; cam = mat->a; 4483 for (i=0; i<am; i++) { 4484 /* off-diagonal portion of A */ 4485 ncols_o = bi[i+1] - bi[i]; 4486 for (jo=0; jo<ncols_o; jo++) { 4487 col = cmap[*bj]; 4488 if (col >= cstart) break; 4489 *cam++ = *ba++; bj++; 4490 } 4491 /* diagonal portion of A */ 4492 ncols_d = ai[i+1] - ai[i]; 4493 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4494 /* off-diagonal portion of A */ 4495 for (j=jo; j<ncols_o; j++) { 4496 *cam++ = *ba++; bj++; 4497 } 4498 } 4499 } else { 4500 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4501 } 4502 4503 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4504 PetscFunctionReturn(0); 4505 } 4506 4507 #undef __FUNCT__ 4508 #define __FUNCT__ "MatGetLocalMatCondensed" 4509 /*@C 4510 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4511 4512 Not Collective 4513 4514 Input Parameters: 4515 + A - the matrix 4516 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4517 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4518 4519 Output Parameter: 4520 . A_loc - the local sequential matrix generated 4521 4522 Level: developer 4523 4524 @*/ 4525 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4526 { 4527 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4528 PetscErrorCode ierr; 4529 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4530 IS isrowa,iscola; 4531 Mat *aloc; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4535 if (!row){ 4536 start = A->rmap->rstart; end = A->rmap->rend; 4537 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4538 } else { 4539 isrowa = *row; 4540 } 4541 if (!col){ 4542 start = A->cmap->rstart; 4543 cmap = a->garray; 4544 nzA = a->A->cmap->n; 4545 nzB = a->B->cmap->n; 4546 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4547 ncols = 0; 4548 for (i=0; i<nzB; i++) { 4549 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4550 else break; 4551 } 4552 imark = i; 4553 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4554 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4555 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4556 ierr = PetscFree(idx);CHKERRQ(ierr); 4557 } else { 4558 iscola = *col; 4559 } 4560 if (scall != MAT_INITIAL_MATRIX){ 4561 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4562 aloc[0] = *A_loc; 4563 } 4564 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4565 *A_loc = aloc[0]; 4566 ierr = PetscFree(aloc);CHKERRQ(ierr); 4567 if (!row){ 4568 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4569 } 4570 if (!col){ 4571 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4572 } 4573 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4574 PetscFunctionReturn(0); 4575 } 4576 4577 #undef __FUNCT__ 4578 #define __FUNCT__ "MatGetBrowsOfAcols" 4579 /*@C 4580 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4581 4582 Collective on Mat 4583 4584 Input Parameters: 4585 + A,B - the matrices in mpiaij format 4586 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4587 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4588 4589 Output Parameter: 4590 + rowb, colb - index sets of rows and columns of B to extract 4591 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4592 - B_seq - the sequential matrix generated 4593 4594 Level: developer 4595 4596 @*/ 4597 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4598 { 4599 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4600 PetscErrorCode ierr; 4601 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4602 IS isrowb,iscolb; 4603 Mat *bseq; 4604 4605 PetscFunctionBegin; 4606 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4607 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4608 } 4609 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4610 4611 if (scall == MAT_INITIAL_MATRIX){ 4612 start = A->cmap->rstart; 4613 cmap = a->garray; 4614 nzA = a->A->cmap->n; 4615 nzB = a->B->cmap->n; 4616 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4617 ncols = 0; 4618 for (i=0; i<nzB; i++) { /* row < local row index */ 4619 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4620 else break; 4621 } 4622 imark = i; 4623 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4624 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4625 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4626 ierr = PetscFree(idx);CHKERRQ(ierr); 4627 *brstart = imark; 4628 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4629 } else { 4630 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4631 isrowb = *rowb; iscolb = *colb; 4632 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4633 bseq[0] = *B_seq; 4634 } 4635 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4636 *B_seq = bseq[0]; 4637 ierr = PetscFree(bseq);CHKERRQ(ierr); 4638 if (!rowb){ 4639 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4640 } else { 4641 *rowb = isrowb; 4642 } 4643 if (!colb){ 4644 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4645 } else { 4646 *colb = iscolb; 4647 } 4648 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4649 PetscFunctionReturn(0); 4650 } 4651 4652 #undef __FUNCT__ 4653 #define __FUNCT__ "MatGetBrowsOfAoCols" 4654 /*@C 4655 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4656 of the OFF-DIAGONAL portion of local A 4657 4658 Collective on Mat 4659 4660 Input Parameters: 4661 + A,B - the matrices in mpiaij format 4662 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4663 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4664 . startsj_r - similar to startsj for receives 4665 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4666 4667 Output Parameter: 4668 + B_oth - the sequential matrix generated 4669 4670 Level: developer 4671 4672 @*/ 4673 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4674 { 4675 VecScatter_MPI_General *gen_to,*gen_from; 4676 PetscErrorCode ierr; 4677 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4678 Mat_SeqAIJ *b_oth; 4679 VecScatter ctx=a->Mvctx; 4680 MPI_Comm comm=((PetscObject)ctx)->comm; 4681 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4682 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4683 PetscScalar *rvalues,*svalues; 4684 MatScalar *b_otha,*bufa,*bufA; 4685 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4686 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4687 MPI_Status *sstatus,rstatus; 4688 PetscMPIInt jj; 4689 PetscInt *cols,sbs,rbs; 4690 PetscScalar *vals; 4691 4692 PetscFunctionBegin; 4693 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4694 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4695 } 4696 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4698 4699 gen_to = (VecScatter_MPI_General*)ctx->todata; 4700 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4701 rvalues = gen_from->values; /* holds the length of receiving row */ 4702 svalues = gen_to->values; /* holds the length of sending row */ 4703 nrecvs = gen_from->n; 4704 nsends = gen_to->n; 4705 4706 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4707 srow = gen_to->indices; /* local row index to be sent */ 4708 sstarts = gen_to->starts; 4709 sprocs = gen_to->procs; 4710 sstatus = gen_to->sstatus; 4711 sbs = gen_to->bs; 4712 rstarts = gen_from->starts; 4713 rprocs = gen_from->procs; 4714 rbs = gen_from->bs; 4715 4716 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4717 if (scall == MAT_INITIAL_MATRIX){ 4718 /* i-array */ 4719 /*---------*/ 4720 /* post receives */ 4721 for (i=0; i<nrecvs; i++){ 4722 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4723 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4724 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4725 } 4726 4727 /* pack the outgoing message */ 4728 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4729 sstartsj[0] = 0; rstartsj[0] = 0; 4730 len = 0; /* total length of j or a array to be sent */ 4731 k = 0; 4732 for (i=0; i<nsends; i++){ 4733 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4734 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4735 for (j=0; j<nrows; j++) { 4736 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4737 for (l=0; l<sbs; l++){ 4738 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4739 rowlen[j*sbs+l] = ncols; 4740 len += ncols; 4741 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4742 } 4743 k++; 4744 } 4745 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4746 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4747 } 4748 /* recvs and sends of i-array are completed */ 4749 i = nrecvs; 4750 while (i--) { 4751 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4752 } 4753 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4754 4755 /* allocate buffers for sending j and a arrays */ 4756 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4757 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4758 4759 /* create i-array of B_oth */ 4760 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4761 b_othi[0] = 0; 4762 len = 0; /* total length of j or a array to be received */ 4763 k = 0; 4764 for (i=0; i<nrecvs; i++){ 4765 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4766 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4767 for (j=0; j<nrows; j++) { 4768 b_othi[k+1] = b_othi[k] + rowlen[j]; 4769 len += rowlen[j]; k++; 4770 } 4771 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4772 } 4773 4774 /* allocate space for j and a arrrays of B_oth */ 4775 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4776 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4777 4778 /* j-array */ 4779 /*---------*/ 4780 /* post receives of j-array */ 4781 for (i=0; i<nrecvs; i++){ 4782 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4783 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4784 } 4785 4786 /* pack the outgoing message j-array */ 4787 k = 0; 4788 for (i=0; i<nsends; i++){ 4789 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4790 bufJ = bufj+sstartsj[i]; 4791 for (j=0; j<nrows; j++) { 4792 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4793 for (ll=0; ll<sbs; ll++){ 4794 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4795 for (l=0; l<ncols; l++){ 4796 *bufJ++ = cols[l]; 4797 } 4798 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4799 } 4800 } 4801 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4802 } 4803 4804 /* recvs and sends of j-array are completed */ 4805 i = nrecvs; 4806 while (i--) { 4807 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4808 } 4809 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4810 } else if (scall == MAT_REUSE_MATRIX){ 4811 sstartsj = *startsj; 4812 rstartsj = *startsj_r; 4813 bufa = *bufa_ptr; 4814 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4815 b_otha = b_oth->a; 4816 } else { 4817 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4818 } 4819 4820 /* a-array */ 4821 /*---------*/ 4822 /* post receives of a-array */ 4823 for (i=0; i<nrecvs; i++){ 4824 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4825 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4826 } 4827 4828 /* pack the outgoing message a-array */ 4829 k = 0; 4830 for (i=0; i<nsends; i++){ 4831 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4832 bufA = bufa+sstartsj[i]; 4833 for (j=0; j<nrows; j++) { 4834 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4835 for (ll=0; ll<sbs; ll++){ 4836 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4837 for (l=0; l<ncols; l++){ 4838 *bufA++ = vals[l]; 4839 } 4840 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4841 } 4842 } 4843 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4844 } 4845 /* recvs and sends of a-array are completed */ 4846 i = nrecvs; 4847 while (i--) { 4848 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4849 } 4850 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4851 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4852 4853 if (scall == MAT_INITIAL_MATRIX){ 4854 /* put together the new matrix */ 4855 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4856 4857 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4858 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4859 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4860 b_oth->free_a = PETSC_TRUE; 4861 b_oth->free_ij = PETSC_TRUE; 4862 b_oth->nonew = 0; 4863 4864 ierr = PetscFree(bufj);CHKERRQ(ierr); 4865 if (!startsj || !bufa_ptr){ 4866 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4867 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4868 } else { 4869 *startsj = sstartsj; 4870 *startsj_r = rstartsj; 4871 *bufa_ptr = bufa; 4872 } 4873 } 4874 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4875 PetscFunctionReturn(0); 4876 } 4877 4878 #undef __FUNCT__ 4879 #define __FUNCT__ "MatGetCommunicationStructs" 4880 /*@C 4881 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4882 4883 Not Collective 4884 4885 Input Parameters: 4886 . A - The matrix in mpiaij format 4887 4888 Output Parameter: 4889 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4890 . colmap - A map from global column index to local index into lvec 4891 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4892 4893 Level: developer 4894 4895 @*/ 4896 #if defined (PETSC_USE_CTABLE) 4897 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4898 #else 4899 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4900 #endif 4901 { 4902 Mat_MPIAIJ *a; 4903 4904 PetscFunctionBegin; 4905 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4906 PetscValidPointer(lvec, 2); 4907 PetscValidPointer(colmap, 3); 4908 PetscValidPointer(multScatter, 4); 4909 a = (Mat_MPIAIJ *) A->data; 4910 if (lvec) *lvec = a->lvec; 4911 if (colmap) *colmap = a->colmap; 4912 if (multScatter) *multScatter = a->Mvctx; 4913 PetscFunctionReturn(0); 4914 } 4915 4916 EXTERN_C_BEGIN 4917 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4918 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4919 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4920 EXTERN_C_END 4921 4922 #undef __FUNCT__ 4923 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4924 /* 4925 Computes (B'*A')' since computing B*A directly is untenable 4926 4927 n p p 4928 ( ) ( ) ( ) 4929 m ( A ) * n ( B ) = m ( C ) 4930 ( ) ( ) ( ) 4931 4932 */ 4933 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4934 { 4935 PetscErrorCode ierr; 4936 Mat At,Bt,Ct; 4937 4938 PetscFunctionBegin; 4939 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4940 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4941 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4942 ierr = MatDestroy(At);CHKERRQ(ierr); 4943 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4944 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4945 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 4949 #undef __FUNCT__ 4950 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4951 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4952 { 4953 PetscErrorCode ierr; 4954 PetscInt m=A->rmap->n,n=B->cmap->n; 4955 Mat Cmat; 4956 4957 PetscFunctionBegin; 4958 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4959 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4960 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4961 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4962 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4963 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4964 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4965 *C = Cmat; 4966 PetscFunctionReturn(0); 4967 } 4968 4969 /* ----------------------------------------------------------------*/ 4970 #undef __FUNCT__ 4971 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4972 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4973 { 4974 PetscErrorCode ierr; 4975 4976 PetscFunctionBegin; 4977 if (scall == MAT_INITIAL_MATRIX){ 4978 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4979 } 4980 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4981 PetscFunctionReturn(0); 4982 } 4983 4984 EXTERN_C_BEGIN 4985 #if defined(PETSC_HAVE_MUMPS) 4986 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 4987 #endif 4988 #if defined(PETSC_HAVE_PASTIX) 4989 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4990 #endif 4991 #if defined(PETSC_HAVE_SUPERLU_DIST) 4992 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 4993 #endif 4994 #if defined(PETSC_HAVE_SPOOLES) 4995 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 4996 #endif 4997 EXTERN_C_END 4998 4999 /*MC 5000 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5001 5002 Options Database Keys: 5003 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5004 5005 Level: beginner 5006 5007 .seealso: MatCreateMPIAIJ() 5008 M*/ 5009 5010 EXTERN_C_BEGIN 5011 #undef __FUNCT__ 5012 #define __FUNCT__ "MatCreate_MPIAIJ" 5013 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5014 { 5015 Mat_MPIAIJ *b; 5016 PetscErrorCode ierr; 5017 PetscMPIInt size; 5018 5019 PetscFunctionBegin; 5020 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5021 5022 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5023 B->data = (void*)b; 5024 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5025 B->rmap->bs = 1; 5026 B->assembled = PETSC_FALSE; 5027 B->mapping = 0; 5028 5029 B->insertmode = NOT_SET_VALUES; 5030 b->size = size; 5031 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5032 5033 /* build cache for off array entries formed */ 5034 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5035 b->donotstash = PETSC_FALSE; 5036 b->colmap = 0; 5037 b->garray = 0; 5038 b->roworiented = PETSC_TRUE; 5039 5040 /* stuff used for matrix vector multiply */ 5041 b->lvec = PETSC_NULL; 5042 b->Mvctx = PETSC_NULL; 5043 5044 /* stuff for MatGetRow() */ 5045 b->rowindices = 0; 5046 b->rowvalues = 0; 5047 b->getrowactive = PETSC_FALSE; 5048 5049 #if defined(PETSC_HAVE_SPOOLES) 5050 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5051 "MatGetFactor_mpiaij_spooles", 5052 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5053 #endif 5054 #if defined(PETSC_HAVE_MUMPS) 5055 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5056 "MatGetFactor_aij_mumps", 5057 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5058 #endif 5059 #if defined(PETSC_HAVE_PASTIX) 5060 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5061 "MatGetFactor_mpiaij_pastix", 5062 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5063 #endif 5064 #if defined(PETSC_HAVE_SUPERLU_DIST) 5065 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5066 "MatGetFactor_mpiaij_superlu_dist", 5067 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5068 #endif 5069 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5070 "MatStoreValues_MPIAIJ", 5071 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5072 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5073 "MatRetrieveValues_MPIAIJ", 5074 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5075 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5076 "MatGetDiagonalBlock_MPIAIJ", 5077 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5079 "MatIsTranspose_MPIAIJ", 5080 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5082 "MatMPIAIJSetPreallocation_MPIAIJ", 5083 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5085 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5086 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5088 "MatDiagonalScaleLocal_MPIAIJ", 5089 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5091 "MatConvert_MPIAIJ_MPICSRPERM", 5092 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5094 "MatConvert_MPIAIJ_MPICRL", 5095 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5097 "MatConvert_MPIAIJ_MPISBAIJ", 5098 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5100 "MatMatMult_MPIDense_MPIAIJ", 5101 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5103 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5104 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5106 "MatMatMultNumeric_MPIDense_MPIAIJ", 5107 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5108 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5109 PetscFunctionReturn(0); 5110 } 5111 EXTERN_C_END 5112 5113 #undef __FUNCT__ 5114 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5115 /*@ 5116 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5117 and "off-diagonal" part of the matrix in CSR format. 5118 5119 Collective on MPI_Comm 5120 5121 Input Parameters: 5122 + comm - MPI communicator 5123 . m - number of local rows (Cannot be PETSC_DECIDE) 5124 . n - This value should be the same as the local size used in creating the 5125 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5126 calculated if N is given) For square matrices n is almost always m. 5127 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5128 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5129 . i - row indices for "diagonal" portion of matrix 5130 . j - column indices 5131 . a - matrix values 5132 . oi - row indices for "off-diagonal" portion of matrix 5133 . oj - column indices 5134 - oa - matrix values 5135 5136 Output Parameter: 5137 . mat - the matrix 5138 5139 Level: advanced 5140 5141 Notes: 5142 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5143 5144 The i and j indices are 0 based 5145 5146 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5147 5148 This sets local rows and cannot be used to set off-processor values. 5149 5150 You cannot later use MatSetValues() to change values in this matrix. 5151 5152 .keywords: matrix, aij, compressed row, sparse, parallel 5153 5154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5155 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5156 @*/ 5157 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5158 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5159 { 5160 PetscErrorCode ierr; 5161 Mat_MPIAIJ *maij; 5162 5163 PetscFunctionBegin; 5164 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5165 if (i[0]) { 5166 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5167 } 5168 if (oi[0]) { 5169 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5170 } 5171 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5172 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5173 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5174 maij = (Mat_MPIAIJ*) (*mat)->data; 5175 maij->donotstash = PETSC_TRUE; 5176 (*mat)->preallocated = PETSC_TRUE; 5177 5178 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5179 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5180 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5181 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5182 5183 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5184 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5185 5186 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5187 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5188 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5189 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5190 5191 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5192 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5193 PetscFunctionReturn(0); 5194 } 5195 5196 /* 5197 Special version for direct calls from Fortran 5198 */ 5199 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5200 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5202 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5203 #endif 5204 5205 /* Change these macros so can be used in void function */ 5206 #undef CHKERRQ 5207 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5208 #undef SETERRQ2 5209 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5210 #undef SETERRQ 5211 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5212 5213 EXTERN_C_BEGIN 5214 #undef __FUNCT__ 5215 #define __FUNCT__ "matsetvaluesmpiaij_" 5216 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5217 { 5218 Mat mat = *mmat; 5219 PetscInt m = *mm, n = *mn; 5220 InsertMode addv = *maddv; 5221 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5222 PetscScalar value; 5223 PetscErrorCode ierr; 5224 5225 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5226 if (mat->insertmode == NOT_SET_VALUES) { 5227 mat->insertmode = addv; 5228 } 5229 #if defined(PETSC_USE_DEBUG) 5230 else if (mat->insertmode != addv) { 5231 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5232 } 5233 #endif 5234 { 5235 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5236 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5237 PetscTruth roworiented = aij->roworiented; 5238 5239 /* Some Variables required in the macro */ 5240 Mat A = aij->A; 5241 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5242 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5243 MatScalar *aa = a->a; 5244 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5245 Mat B = aij->B; 5246 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5247 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5248 MatScalar *ba = b->a; 5249 5250 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5251 PetscInt nonew = a->nonew; 5252 MatScalar *ap1,*ap2; 5253 5254 PetscFunctionBegin; 5255 for (i=0; i<m; i++) { 5256 if (im[i] < 0) continue; 5257 #if defined(PETSC_USE_DEBUG) 5258 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5259 #endif 5260 if (im[i] >= rstart && im[i] < rend) { 5261 row = im[i] - rstart; 5262 lastcol1 = -1; 5263 rp1 = aj + ai[row]; 5264 ap1 = aa + ai[row]; 5265 rmax1 = aimax[row]; 5266 nrow1 = ailen[row]; 5267 low1 = 0; 5268 high1 = nrow1; 5269 lastcol2 = -1; 5270 rp2 = bj + bi[row]; 5271 ap2 = ba + bi[row]; 5272 rmax2 = bimax[row]; 5273 nrow2 = bilen[row]; 5274 low2 = 0; 5275 high2 = nrow2; 5276 5277 for (j=0; j<n; j++) { 5278 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5279 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5280 if (in[j] >= cstart && in[j] < cend){ 5281 col = in[j] - cstart; 5282 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5283 } else if (in[j] < 0) continue; 5284 #if defined(PETSC_USE_DEBUG) 5285 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5286 #endif 5287 else { 5288 if (mat->was_assembled) { 5289 if (!aij->colmap) { 5290 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5291 } 5292 #if defined (PETSC_USE_CTABLE) 5293 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5294 col--; 5295 #else 5296 col = aij->colmap[in[j]] - 1; 5297 #endif 5298 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5299 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5300 col = in[j]; 5301 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5302 B = aij->B; 5303 b = (Mat_SeqAIJ*)B->data; 5304 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5305 rp2 = bj + bi[row]; 5306 ap2 = ba + bi[row]; 5307 rmax2 = bimax[row]; 5308 nrow2 = bilen[row]; 5309 low2 = 0; 5310 high2 = nrow2; 5311 bm = aij->B->rmap->n; 5312 ba = b->a; 5313 } 5314 } else col = in[j]; 5315 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5316 } 5317 } 5318 } else { 5319 if (!aij->donotstash) { 5320 if (roworiented) { 5321 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5322 } else { 5323 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5324 } 5325 } 5326 } 5327 }} 5328 PetscFunctionReturnVoid(); 5329 } 5330 EXTERN_C_END 5331 5332