1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 468 mat->insertmode = addv; /* in case this processor had no cache */ 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 472 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 482 PetscErrorCode ierr; 483 PetscMPIInt n; 484 PetscInt i,j,rstart,ncols,flg; 485 PetscInt *row,*col; 486 PetscTruth other_disassembled; 487 PetscScalar *val; 488 InsertMode addv = mat->insertmode; 489 490 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 491 PetscFunctionBegin; 492 if (!aij->donotstash) { 493 while (1) { 494 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 495 if (!flg) break; 496 497 for (i=0; i<n;) { 498 /* Now identify the consecutive vals belonging to the same row */ 499 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 500 if (j < n) ncols = j-i; 501 else ncols = n-i; 502 /* Now assemble all these values with a single function call */ 503 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 504 i = j; 505 } 506 } 507 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 508 } 509 a->compressedrow.use = PETSC_FALSE; 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 #if defined(PETSC_DEBUG) 574 PetscTruth found = PETSC_FALSE; 575 #endif 576 577 PetscFunctionBegin; 578 /* first count number of contributors to each processor */ 579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 582 j = 0; 583 for (i=0; i<N; i++) { 584 if (lastidx > (idx = rows[i])) j = 0; 585 lastidx = idx; 586 for (; j<size; j++) { 587 if (idx >= owners[j] && idx < owners[j+1]) { 588 nprocs[2*j]++; 589 nprocs[2*j+1] = 1; 590 owner[i] = j; 591 #if defined(PETSC_DEBUG) 592 found = PETSC_TRUE; 593 #endif 594 break; 595 } 596 } 597 #if defined(PETSC_DEBUG) 598 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 599 found = PETSC_FALSE; 600 #endif 601 } 602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 603 604 /* inform other processors of number of messages and max length*/ 605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 606 607 /* post receives: */ 608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 610 for (i=0; i<nrecvs; i++) { 611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 612 } 613 614 /* do sends: 615 1) starts[i] gives the starting index in svalues for stuff going to 616 the ith processor 617 */ 618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 621 starts[0] = 0; 622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 623 for (i=0; i<N; i++) { 624 svalues[starts[owner[i]]++] = rows[i]; 625 } 626 627 starts[0] = 0; 628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 629 count = 0; 630 for (i=0; i<size; i++) { 631 if (nprocs[2*i+1]) { 632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscFree(starts);CHKERRQ(ierr); 636 637 base = owners[rank]; 638 639 /* wait on receives */ 640 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 641 count = nrecvs; slen = 0; 642 while (count) { 643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 644 /* unpack receives into our local space */ 645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 646 source[imdex] = recv_status.MPI_SOURCE; 647 lens[imdex] = n; 648 slen += n; 649 count--; 650 } 651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 652 653 /* move the data into the send scatter */ 654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 655 count = 0; 656 for (i=0; i<nrecvs; i++) { 657 values = rvalues + i*nmax; 658 for (j=0; j<lens[i]; j++) { 659 lrows[count++] = values[j] - base; 660 } 661 } 662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 663 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 664 ierr = PetscFree(owner);CHKERRQ(ierr); 665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 666 667 /* actually zap the local rows */ 668 /* 669 Zero the required rows. If the "diagonal block" of the matrix 670 is square and the user wishes to set the diagonal we use separate 671 code so that MatSetValues() is not called for each diagonal allocating 672 new memory, thus calling lots of mallocs and slowing things down. 673 674 */ 675 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 676 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 677 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 678 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 679 } else if (diag != 0.0) { 680 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 681 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 682 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 684 } 685 for (i = 0; i < slen; i++) { 686 row = lrows[i] + rstart; 687 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 } else { 692 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 693 } 694 ierr = PetscFree(lrows);CHKERRQ(ierr); 695 696 /* wait on sends */ 697 if (nsends) { 698 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 699 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 700 ierr = PetscFree(send_status);CHKERRQ(ierr); 701 } 702 ierr = PetscFree(send_waits);CHKERRQ(ierr); 703 ierr = PetscFree(svalues);CHKERRQ(ierr); 704 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMult_MPIAIJ" 710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt nt; 715 716 PetscFunctionBegin; 717 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 718 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 719 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 721 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatMultAdd_MPIAIJ" 740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 748 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 758 PetscErrorCode ierr; 759 PetscTruth merged; 760 761 PetscFunctionBegin; 762 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 763 /* do nondiagonal part */ 764 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 765 if (!merged) { 766 /* send it on its way */ 767 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 /* do local part */ 769 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 770 /* receive remote parts: note this assumes the values are not actually */ 771 /* added in yy until the next line, */ 772 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 } else { 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* send it on its way */ 777 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 /* values actually were received in the Begin() but we need to call this nop */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 EXTERN_C_BEGIN 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 788 { 789 MPI_Comm comm; 790 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 791 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 792 IS Me,Notme; 793 PetscErrorCode ierr; 794 PetscInt M,N,first,last,*notme,i; 795 PetscMPIInt size; 796 797 PetscFunctionBegin; 798 799 /* Easy test: symmetric diagonal block */ 800 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 801 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 802 if (!*f) PetscFunctionReturn(0); 803 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 805 if (size == 1) PetscFunctionReturn(0); 806 807 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 808 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 809 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 810 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 811 for (i=0; i<first; i++) notme[i] = i; 812 for (i=last; i<M; i++) notme[i-last+first] = i; 813 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 814 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 815 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 816 Aoff = Aoffs[0]; 817 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 818 Boff = Boffs[0]; 819 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 820 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 822 ierr = ISDestroy(Me);CHKERRQ(ierr); 823 ierr = ISDestroy(Notme);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 EXTERN_C_END 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 /* do nondiagonal part */ 838 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 839 /* send it on its way */ 840 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 841 /* do local part */ 842 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 843 /* receive remote parts */ 844 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This only works correctly for square matrices where the subblock A->A is the 850 diagonal block 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 855 { 856 PetscErrorCode ierr; 857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 858 859 PetscFunctionBegin; 860 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 861 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 862 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatScale_MPIAIJ" 868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 869 { 870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 875 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDestroy_MPIAIJ" 881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 882 { 883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 #if defined(PETSC_USE_LOG) 888 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 889 #endif 890 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 891 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 892 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 893 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 894 #if defined (PETSC_USE_CTABLE) 895 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 896 #else 897 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 898 #endif 899 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 900 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 901 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 902 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 903 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 904 ierr = PetscFree(aij);CHKERRQ(ierr); 905 906 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatView_MPIAIJ_Binary" 920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 921 { 922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 923 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 924 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 925 PetscErrorCode ierr; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 int fd; 928 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 929 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 930 PetscScalar *column_values; 931 932 PetscFunctionBegin; 933 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 934 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 935 nz = A->nz + B->nz; 936 if (!rank) { 937 header[0] = MAT_FILE_CLASSID; 938 header[1] = mat->rmap->N; 939 header[2] = mat->cmap->N; 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 942 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 /* get largest number of rows any processor has */ 944 rlen = mat->rmap->n; 945 range = mat->rmap->range; 946 for (i=1; i<size; i++) { 947 rlen = PetscMax(rlen,range[i+1] - range[i]); 948 } 949 } else { 950 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 951 rlen = mat->rmap->n; 952 } 953 954 /* load up the local row counts */ 955 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 956 for (i=0; i<mat->rmap->n; i++) { 957 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 958 } 959 960 /* store the row lengths to the file */ 961 if (!rank) { 962 MPI_Status status; 963 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 964 for (i=1; i<size; i++) { 965 rlen = range[i+1] - range[i]; 966 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 967 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 } 969 } else { 970 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 971 } 972 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 973 974 /* load up the local column indices */ 975 nzmax = nz; /* )th processor needs space a largest processor needs */ 976 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 978 cnt = 0; 979 for (i=0; i<mat->rmap->n; i++) { 980 for (j=B->i[i]; j<B->i[i+1]; j++) { 981 if ( (col = garray[B->j[j]]) > cstart) break; 982 column_indices[cnt++] = col; 983 } 984 for (k=A->i[i]; k<A->i[i+1]; k++) { 985 column_indices[cnt++] = A->j[k] + cstart; 986 } 987 for (; j<B->i[i+1]; j++) { 988 column_indices[cnt++] = garray[B->j[j]]; 989 } 990 } 991 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 992 993 /* store the column indices to the file */ 994 if (!rank) { 995 MPI_Status status; 996 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 997 for (i=1; i<size; i++) { 998 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 999 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1000 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1001 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 } 1003 } else { 1004 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1005 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1008 1009 /* load up the local column values */ 1010 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1011 cnt = 0; 1012 for (i=0; i<mat->rmap->n; i++) { 1013 for (j=B->i[i]; j<B->i[i+1]; j++) { 1014 if ( garray[B->j[j]] > cstart) break; 1015 column_values[cnt++] = B->a[j]; 1016 } 1017 for (k=A->i[i]; k<A->i[i+1]; k++) { 1018 column_values[cnt++] = A->a[k]; 1019 } 1020 for (; j<B->i[i+1]; j++) { 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 } 1024 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1025 1026 /* store the column values to the file */ 1027 if (!rank) { 1028 MPI_Status status; 1029 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1030 for (i=1; i<size; i++) { 1031 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1032 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1033 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1034 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1035 } 1036 } else { 1037 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1038 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 } 1040 ierr = PetscFree(column_values);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1047 { 1048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1049 PetscErrorCode ierr; 1050 PetscMPIInt rank = aij->rank,size = aij->size; 1051 PetscTruth isdraw,iascii,isbinary; 1052 PetscViewer sviewer; 1053 PetscViewerFormat format; 1054 1055 PetscFunctionBegin; 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1059 if (iascii) { 1060 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1061 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1062 MatInfo info; 1063 PetscTruth inodes; 1064 1065 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1066 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1067 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1068 if (!inodes) { 1069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1070 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1073 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1074 } 1075 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1077 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1079 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1081 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscInt inodecount,inodelimit,*inodes; 1085 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1086 if (inodes) { 1087 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1088 } else { 1089 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1090 } 1091 PetscFunctionReturn(0); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } 1095 } else if (isbinary) { 1096 if (size == 1) { 1097 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1098 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1099 } else { 1100 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } else if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1112 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqAIJ *Aloc; 1117 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1118 MatScalar *a; 1119 1120 if (mat->rmap->N > 1024) { 1121 PetscTruth flg = PETSC_FALSE; 1122 1123 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1124 if (!flg) { 1125 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1126 } 1127 } 1128 1129 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1130 if (!rank) { 1131 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1132 } else { 1133 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1134 } 1135 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1136 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqAIJ*)aij->A->data; 1142 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 row = mat->rmap->rstart; 1144 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1145 for (i=0; i<m; i++) { 1146 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1147 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1148 } 1149 aj = Aloc->j; 1150 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1151 1152 /* copy over the B part */ 1153 Aloc = (Mat_SeqAIJ*)aij->B->data; 1154 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 row = mat->rmap->rstart; 1156 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1157 ct = cols; 1158 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1159 for (i=0; i<m; i++) { 1160 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1161 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1162 } 1163 ierr = PetscFree(ct);CHKERRQ(ierr); 1164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 /* 1167 Everyone has to call to draw the matrix since the graphics waits are 1168 synchronized across all processors that share the PetscDraw object 1169 */ 1170 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 if (!rank) { 1172 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1173 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1176 ierr = MatDestroy(A);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatView_MPIAIJ" 1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1184 { 1185 PetscErrorCode ierr; 1186 PetscTruth iascii,isdraw,issocket,isbinary; 1187 1188 PetscFunctionBegin; 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1193 if (iascii || isdraw || isbinary || issocket) { 1194 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1195 } else { 1196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSOR_MPIAIJ" 1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1204 { 1205 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1206 PetscErrorCode ierr; 1207 Vec bb1 = 0; 1208 PetscTruth hasop; 1209 1210 PetscFunctionBegin; 1211 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1212 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1213 } 1214 1215 if (flag == SOR_APPLY_UPPER) { 1216 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1221 if (flag & SOR_ZERO_INITIAL_GUESS) { 1222 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1223 its--; 1224 } 1225 1226 while (its--) { 1227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 1230 /* update rhs: bb1 = bb - B*x */ 1231 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1233 1234 /* local sweep */ 1235 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1236 } 1237 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1238 if (flag & SOR_ZERO_INITIAL_GUESS) { 1239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1240 its--; 1241 } 1242 while (its--) { 1243 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* update rhs: bb1 = bb - B*x */ 1247 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1249 1250 /* local sweep */ 1251 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1252 } 1253 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1254 if (flag & SOR_ZERO_INITIAL_GUESS) { 1255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1256 its--; 1257 } 1258 while (its--) { 1259 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 1262 /* update rhs: bb1 = bb - B*x */ 1263 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1264 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1265 1266 /* local sweep */ 1267 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1268 } 1269 } else if (flag & SOR_EISENSTAT) { 1270 Vec xx1; 1271 1272 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1274 1275 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 if (!mat->diag) { 1278 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1279 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1280 } 1281 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1282 if (hasop) { 1283 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1284 } else { 1285 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1286 } 1287 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1288 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SPD: 1442 A->spd_set = PETSC_TRUE; 1443 A->spd = flg; 1444 if (flg) { 1445 A->symmetric = PETSC_TRUE; 1446 A->structurally_symmetric = PETSC_TRUE; 1447 A->symmetric_set = PETSC_TRUE; 1448 A->structurally_symmetric_set = PETSC_TRUE; 1449 } 1450 break; 1451 case MAT_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_STRUCTURALLY_SYMMETRIC: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_HERMITIAN: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 case MAT_SYMMETRY_ETERNAL: 1461 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1462 break; 1463 default: 1464 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatGetRow_MPIAIJ" 1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1475 PetscErrorCode ierr; 1476 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1477 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1478 PetscInt *cmap,*idx_p; 1479 1480 PetscFunctionBegin; 1481 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1482 mat->getrowactive = PETSC_TRUE; 1483 1484 if (!mat->rowvalues && (idx || v)) { 1485 /* 1486 allocate enough space to hold information from the longest row. 1487 */ 1488 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1489 PetscInt max = 1,tmp; 1490 for (i=0; i<matin->rmap->n; i++) { 1491 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1492 if (max < tmp) { max = tmp; } 1493 } 1494 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1495 } 1496 1497 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1498 lrow = row - rstart; 1499 1500 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1501 if (!v) {pvA = 0; pvB = 0;} 1502 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1503 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1505 nztot = nzA + nzB; 1506 1507 cmap = mat->garray; 1508 if (v || idx) { 1509 if (nztot) { 1510 /* Sort by increasing column numbers, assuming A and B already sorted */ 1511 PetscInt imark = -1; 1512 if (v) { 1513 *v = v_p = mat->rowvalues; 1514 for (i=0; i<nzB; i++) { 1515 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1516 else break; 1517 } 1518 imark = i; 1519 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1520 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1521 } 1522 if (idx) { 1523 *idx = idx_p = mat->rowindices; 1524 if (imark > -1) { 1525 for (i=0; i<imark; i++) { 1526 idx_p[i] = cmap[cworkB[i]]; 1527 } 1528 } else { 1529 for (i=0; i<nzB; i++) { 1530 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1531 else break; 1532 } 1533 imark = i; 1534 } 1535 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1536 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1537 } 1538 } else { 1539 if (idx) *idx = 0; 1540 if (v) *v = 0; 1541 } 1542 } 1543 *nz = nztot; 1544 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1545 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1554 1555 PetscFunctionBegin; 1556 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1557 aij->getrowactive = PETSC_FALSE; 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatNorm_MPIAIJ" 1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1564 { 1565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1566 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,cstart = mat->cmap->rstart; 1569 PetscReal sum = 0.0; 1570 MatScalar *v; 1571 1572 PetscFunctionBegin; 1573 if (aij->size == 1) { 1574 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1575 } else { 1576 if (type == NORM_FROBENIUS) { 1577 v = amat->a; 1578 for (i=0; i<amat->nz; i++) { 1579 #if defined(PETSC_USE_COMPLEX) 1580 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1581 #else 1582 sum += (*v)*(*v); v++; 1583 #endif 1584 } 1585 v = bmat->a; 1586 for (i=0; i<bmat->nz; i++) { 1587 #if defined(PETSC_USE_COMPLEX) 1588 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1589 #else 1590 sum += (*v)*(*v); v++; 1591 #endif 1592 } 1593 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1594 *norm = sqrt(*norm); 1595 } else if (type == NORM_1) { /* max column norm */ 1596 PetscReal *tmp,*tmp2; 1597 PetscInt *jj,*garray = aij->garray; 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1600 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1601 *norm = 0.0; 1602 v = amat->a; jj = amat->j; 1603 for (j=0; j<amat->nz; j++) { 1604 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1605 } 1606 v = bmat->a; jj = bmat->j; 1607 for (j=0; j<bmat->nz; j++) { 1608 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1609 } 1610 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1611 for (j=0; j<mat->cmap->N; j++) { 1612 if (tmp2[j] > *norm) *norm = tmp2[j]; 1613 } 1614 ierr = PetscFree(tmp);CHKERRQ(ierr); 1615 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1616 } else if (type == NORM_INFINITY) { /* max row norm */ 1617 PetscReal ntemp = 0.0; 1618 for (j=0; j<aij->A->rmap->n; j++) { 1619 v = amat->a + amat->i[j]; 1620 sum = 0.0; 1621 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1622 sum += PetscAbsScalar(*v); v++; 1623 } 1624 v = bmat->a + bmat->i[j]; 1625 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1626 sum += PetscAbsScalar(*v); v++; 1627 } 1628 if (sum > ntemp) ntemp = sum; 1629 } 1630 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1631 } else { 1632 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1633 } 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatTranspose_MPIAIJ" 1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1641 { 1642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1643 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1644 PetscErrorCode ierr; 1645 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1646 PetscInt cstart=A->cmap->rstart,ncol; 1647 Mat B; 1648 MatScalar *array; 1649 1650 PetscFunctionBegin; 1651 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1652 1653 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1654 ai = Aloc->i; aj = Aloc->j; 1655 bi = Bloc->i; bj = Bloc->j; 1656 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1657 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1658 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1659 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1660 for (i=0; i<ai[ma]; i++){ 1661 d_nnz[aj[i]] ++; 1662 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1663 } 1664 1665 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1666 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1667 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1669 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1670 } else { 1671 B = *matout; 1672 } 1673 1674 /* copy over the A part */ 1675 array = Aloc->a; 1676 row = A->rmap->rstart; 1677 for (i=0; i<ma; i++) { 1678 ncol = ai[i+1]-ai[i]; 1679 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1680 row++; array += ncol; aj += ncol; 1681 } 1682 aj = Aloc->j; 1683 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1684 1685 /* copy over the B part */ 1686 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1687 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1688 array = Bloc->a; 1689 row = A->rmap->rstart; 1690 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1691 cols_tmp = cols; 1692 for (i=0; i<mb; i++) { 1693 ncol = bi[i+1]-bi[i]; 1694 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1695 row++; array += ncol; cols_tmp += ncol; 1696 } 1697 ierr = PetscFree(cols);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1702 *matout = B; 1703 } else { 1704 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1714 Mat a = aij->A,b = aij->B; 1715 PetscErrorCode ierr; 1716 PetscInt s1,s2,s3; 1717 1718 PetscFunctionBegin; 1719 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1720 if (rr) { 1721 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1722 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1723 /* Overlap communication with computation. */ 1724 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 } 1726 if (ll) { 1727 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1728 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1729 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1730 } 1731 /* scale the diagonal block */ 1732 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1733 1734 if (rr) { 1735 /* Do a scatter end and then right scale the off-diagonal block */ 1736 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1738 } 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1746 { 1747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1752 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1753 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1760 { 1761 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatEqual_MPIAIJ" 1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1772 { 1773 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1774 Mat a,b,c,d; 1775 PetscTruth flg; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 a = matA->A; b = matA->B; 1780 c = matB->A; d = matB->B; 1781 1782 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1783 if (flg) { 1784 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1785 } 1786 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatCopy_MPIAIJ" 1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1793 { 1794 PetscErrorCode ierr; 1795 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1796 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 /* because of the column compression in the off-processor part of the matrix a->B, 1802 the number of columns in a->B and b->B may be different, hence we cannot call 1803 the MatCopy() directly on the two parts. If need be, we can provide a more 1804 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1805 then copying the submatrices */ 1806 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1809 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1817 { 1818 PetscErrorCode ierr; 1819 1820 PetscFunctionBegin; 1821 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatAXPY_MPIAIJ" 1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1828 { 1829 PetscErrorCode ierr; 1830 PetscInt i; 1831 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1832 PetscBLASInt bnz,one=1; 1833 Mat_SeqAIJ *x,*y; 1834 1835 PetscFunctionBegin; 1836 if (str == SAME_NONZERO_PATTERN) { 1837 PetscScalar alpha = a; 1838 x = (Mat_SeqAIJ *)xx->A->data; 1839 y = (Mat_SeqAIJ *)yy->A->data; 1840 bnz = PetscBLASIntCast(x->nz); 1841 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1842 x = (Mat_SeqAIJ *)xx->B->data; 1843 y = (Mat_SeqAIJ *)yy->B->data; 1844 bnz = PetscBLASIntCast(x->nz); 1845 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { 1847 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1848 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 if (y->xtoy && y->XtoY != xx->B) { 1852 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1853 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1854 } 1855 if (!y->xtoy) { /* get xtoy */ 1856 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1857 y->XtoY = xx->B; 1858 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1859 } 1860 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1861 } else { 1862 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatConjugate_MPIAIJ" 1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1872 { 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscErrorCode ierr; 1875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1876 1877 PetscFunctionBegin; 1878 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1879 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1880 #else 1881 PetscFunctionBegin; 1882 #endif 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRealPart_MPIAIJ" 1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1889 { 1890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1895 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1908 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #ifdef PETSC_HAVE_PBGL 1913 1914 #include <boost/parallel/mpi/bsp_process_group.hpp> 1915 #include <boost/graph/distributed/ilu_default_graph.hpp> 1916 #include <boost/graph/distributed/ilu_0_block.hpp> 1917 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1918 #include <boost/graph/distributed/petsc/interface.hpp> 1919 #include <boost/multi_array.hpp> 1920 #include <boost/parallel/distributed_property_map->hpp> 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1924 /* 1925 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1926 */ 1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1928 { 1929 namespace petsc = boost::distributed::petsc; 1930 1931 namespace graph_dist = boost::graph::distributed; 1932 using boost::graph::distributed::ilu_default::process_group_type; 1933 using boost::graph::ilu_permuted; 1934 1935 PetscTruth row_identity, col_identity; 1936 PetscContainer c; 1937 PetscInt m, n, M, N; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1942 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1943 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1944 if (!row_identity || !col_identity) { 1945 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1946 } 1947 1948 process_group_type pg; 1949 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1950 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1951 lgraph_type& level_graph = *lgraph_p; 1952 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1953 1954 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1955 ilu_permuted(level_graph); 1956 1957 /* put together the new matrix */ 1958 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1959 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1960 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1961 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1962 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 1966 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1967 ierr = PetscContainerSetPointer(c, lgraph_p); 1968 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatSolve_MPIAIJ" 1982 /* 1983 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1984 */ 1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1986 { 1987 namespace graph_dist = boost::graph::distributed; 1988 1989 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1990 lgraph_type* lgraph_p; 1991 PetscContainer c; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1996 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1997 ierr = VecCopy(b, x);CHKERRQ(ierr); 1998 1999 PetscScalar* array_x; 2000 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2001 PetscInt sx; 2002 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2003 2004 PetscScalar* array_b; 2005 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2006 PetscInt sb; 2007 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2008 2009 lgraph_type& level_graph = *lgraph_p; 2010 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2011 2012 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2013 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2014 ref_x(array_x, boost::extents[num_vertices(graph)]); 2015 2016 typedef boost::iterator_property_map<array_ref_type::iterator, 2017 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2018 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2019 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2020 2021 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2022 2023 PetscFunctionReturn(0); 2024 } 2025 #endif 2026 2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2028 PetscInt nzlocal,nsends,nrecvs; 2029 PetscMPIInt *send_rank,*recv_rank; 2030 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2031 PetscScalar *sbuf_a,**rbuf_a; 2032 PetscErrorCode (*MatDestroy)(Mat); 2033 } Mat_Redundant; 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2041 PetscInt i; 2042 2043 PetscFunctionBegin; 2044 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2045 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2047 for (i=0; i<redund->nrecvs; i++){ 2048 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2049 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2052 ierr = PetscFree(redund);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDestroy_MatRedundant" 2058 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 PetscContainer container; 2062 Mat_Redundant *redund=PETSC_NULL; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2066 if (container) { 2067 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2068 } else { 2069 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2070 } 2071 A->ops->destroy = redund->MatDestroy; 2072 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2073 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2081 { 2082 PetscMPIInt rank,size; 2083 MPI_Comm comm=((PetscObject)mat)->comm; 2084 PetscErrorCode ierr; 2085 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2086 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2087 PetscInt *rowrange=mat->rmap->range; 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat A=aij->A,B=aij->B,C=*matredundant; 2090 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2091 PetscScalar *sbuf_a; 2092 PetscInt nzlocal=a->nz+b->nz; 2093 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2094 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2095 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2096 MatScalar *aworkA,*aworkB; 2097 PetscScalar *vals; 2098 PetscMPIInt tag1,tag2,tag3,imdex; 2099 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2100 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2101 MPI_Status recv_status,*send_status; 2102 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2103 PetscInt **rbuf_j=PETSC_NULL; 2104 PetscScalar **rbuf_a=PETSC_NULL; 2105 Mat_Redundant *redund=PETSC_NULL; 2106 PetscContainer container; 2107 2108 PetscFunctionBegin; 2109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2111 2112 if (reuse == MAT_REUSE_MATRIX) { 2113 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2115 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2117 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2118 if (container) { 2119 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2120 } else { 2121 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2122 } 2123 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2124 2125 nsends = redund->nsends; 2126 nrecvs = redund->nrecvs; 2127 send_rank = redund->send_rank; 2128 recv_rank = redund->recv_rank; 2129 sbuf_nz = redund->sbuf_nz; 2130 rbuf_nz = redund->rbuf_nz; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2144 np_subcomm = size/nsubcomm; 2145 nleftover = size - nsubcomm*np_subcomm; 2146 nsends = 0; nrecvs = 0; 2147 for (i=0; i<size; i++){ /* i=rank*/ 2148 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2149 send_rank[nsends] = i; nsends++; 2150 recv_rank[nrecvs++] = i; 2151 } 2152 } 2153 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2154 i = size-nleftover-1; 2155 j = 0; 2156 while (j < nsubcomm - nleftover){ 2157 send_rank[nsends++] = i; 2158 i--; j++; 2159 } 2160 } 2161 2162 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2163 for (i=0; i<nleftover; i++){ 2164 recv_rank[nrecvs++] = size-nleftover+i; 2165 } 2166 } 2167 2168 /* allocate sbuf_j, sbuf_a */ 2169 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2170 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2171 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2172 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2173 2174 /* copy mat's local entries into the buffers */ 2175 if (reuse == MAT_INITIAL_MATRIX){ 2176 rownz_max = 0; 2177 rptr = sbuf_j; 2178 cols = sbuf_j + rend-rstart + 1; 2179 vals = sbuf_a; 2180 rptr[0] = 0; 2181 for (i=0; i<rend-rstart; i++){ 2182 row = i + rstart; 2183 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2184 ncols = nzA + nzB; 2185 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2186 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2187 /* load the column indices for this row into cols */ 2188 lwrite = 0; 2189 for (l=0; l<nzB; l++) { 2190 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2191 vals[lwrite] = aworkB[l]; 2192 cols[lwrite++] = ctmp; 2193 } 2194 } 2195 for (l=0; l<nzA; l++){ 2196 vals[lwrite] = aworkA[l]; 2197 cols[lwrite++] = cstart + cworkA[l]; 2198 } 2199 for (l=0; l<nzB; l++) { 2200 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2201 vals[lwrite] = aworkB[l]; 2202 cols[lwrite++] = ctmp; 2203 } 2204 } 2205 vals += ncols; 2206 cols += ncols; 2207 rptr[i+1] = rptr[i] + ncols; 2208 if (rownz_max < ncols) rownz_max = ncols; 2209 } 2210 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2211 } else { /* only copy matrix values into sbuf_a */ 2212 rptr = sbuf_j; 2213 vals = sbuf_a; 2214 rptr[0] = 0; 2215 for (i=0; i<rend-rstart; i++){ 2216 row = i + rstart; 2217 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2218 ncols = nzA + nzB; 2219 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2220 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2221 lwrite = 0; 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2224 } 2225 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2226 for (l=0; l<nzB; l++) { 2227 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2228 } 2229 vals += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 } 2232 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2233 2234 /* send nzlocal to others, and recv other's nzlocal */ 2235 /*--------------------------------------------------*/ 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2238 s_waits2 = s_waits3 + nsends; 2239 s_waits1 = s_waits2 + nsends; 2240 r_waits1 = s_waits1 + nsends; 2241 r_waits2 = r_waits1 + nrecvs; 2242 r_waits3 = r_waits2 + nrecvs; 2243 } else { 2244 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2245 r_waits3 = s_waits3 + nsends; 2246 } 2247 2248 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2249 if (reuse == MAT_INITIAL_MATRIX){ 2250 /* get new tags to keep the communication clean */ 2251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2253 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2254 2255 /* post receives of other's nzlocal */ 2256 for (i=0; i<nrecvs; i++){ 2257 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2258 } 2259 /* send nzlocal to others */ 2260 for (i=0; i<nsends; i++){ 2261 sbuf_nz[i] = nzlocal; 2262 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2263 } 2264 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2265 count = nrecvs; 2266 while (count) { 2267 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2268 recv_rank[imdex] = recv_status.MPI_SOURCE; 2269 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2270 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2271 2272 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2273 rbuf_nz[imdex] += i + 2; 2274 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2275 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2276 count--; 2277 } 2278 /* wait on sends of nzlocal */ 2279 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2280 /* send mat->i,j to others, and recv from other's */ 2281 /*------------------------------------------------*/ 2282 for (i=0; i<nsends; i++){ 2283 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2284 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2285 } 2286 /* wait on receives of mat->i,j */ 2287 /*------------------------------*/ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2291 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2292 count--; 2293 } 2294 /* wait on sends of mat->i,j */ 2295 /*---------------------------*/ 2296 if (nsends) { 2297 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2298 } 2299 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2300 2301 /* post receives, send and receive mat->a */ 2302 /*----------------------------------------*/ 2303 for (imdex=0; imdex<nrecvs; imdex++) { 2304 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2305 } 2306 for (i=0; i<nsends; i++){ 2307 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2308 } 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 if (nsends) { 2316 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2317 } 2318 2319 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2320 2321 /* create redundant matrix */ 2322 /*-------------------------*/ 2323 if (reuse == MAT_INITIAL_MATRIX){ 2324 /* compute rownz_max for preallocation */ 2325 for (imdex=0; imdex<nrecvs; imdex++){ 2326 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2327 rptr = rbuf_j[imdex]; 2328 for (i=0; i<j; i++){ 2329 ncols = rptr[i+1] - rptr[i]; 2330 if (rownz_max < ncols) rownz_max = ncols; 2331 } 2332 } 2333 2334 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2335 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2336 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2337 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 } else { 2340 C = *matredundant; 2341 } 2342 2343 /* insert local matrix entries */ 2344 rptr = sbuf_j; 2345 cols = sbuf_j + rend-rstart + 1; 2346 vals = sbuf_a; 2347 for (i=0; i<rend-rstart; i++){ 2348 row = i + rstart; 2349 ncols = rptr[i+1] - rptr[i]; 2350 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 vals += ncols; 2352 cols += ncols; 2353 } 2354 /* insert received matrix entries */ 2355 for (imdex=0; imdex<nrecvs; imdex++){ 2356 rstart = rowrange[recv_rank[imdex]]; 2357 rend = rowrange[recv_rank[imdex]+1]; 2358 rptr = rbuf_j[imdex]; 2359 cols = rbuf_j[imdex] + rend-rstart + 1; 2360 vals = rbuf_a[imdex]; 2361 for (i=0; i<rend-rstart; i++){ 2362 row = i + rstart; 2363 ncols = rptr[i+1] - rptr[i]; 2364 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2365 vals += ncols; 2366 cols += ncols; 2367 } 2368 } 2369 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2372 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 PetscContainer container; 2375 *matredundant = C; 2376 /* create a supporting struct and attach it to C for reuse */ 2377 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2378 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2380 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2382 2383 redund->nzlocal = nzlocal; 2384 redund->nsends = nsends; 2385 redund->nrecvs = nrecvs; 2386 redund->send_rank = send_rank; 2387 redund->recv_rank = recv_rank; 2388 redund->sbuf_nz = sbuf_nz; 2389 redund->rbuf_nz = rbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/0, 2719 0, 2720 0, 2721 0 2722 }; 2723 2724 /* ----------------------------------------------------------------------------------------*/ 2725 2726 EXTERN_C_BEGIN 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2730 { 2731 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2736 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 EXTERN_C_END 2740 2741 EXTERN_C_BEGIN 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2745 { 2746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2751 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 EXTERN_C_END 2755 2756 EXTERN_C_BEGIN 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 PetscInt i; 2764 2765 PetscFunctionBegin; 2766 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2767 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2768 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2769 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2770 2771 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2773 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2774 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2775 if (d_nnz) { 2776 for (i=0; i<B->rmap->n; i++) { 2777 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2778 } 2779 } 2780 if (o_nnz) { 2781 for (i=0; i<B->rmap->n; i++) { 2782 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2783 } 2784 } 2785 b = (Mat_MPIAIJ*)B->data; 2786 2787 if (!B->preallocated) { 2788 /* Explicitly create 2 MATSEQAIJ matrices. */ 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2791 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2795 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2797 } 2798 2799 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2800 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2801 B->preallocated = PETSC_TRUE; 2802 PetscFunctionReturn(0); 2803 } 2804 EXTERN_C_END 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2809 { 2810 Mat mat; 2811 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 *newmat = 0; 2816 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2818 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2819 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2820 a = (Mat_MPIAIJ*)mat->data; 2821 2822 mat->factortype = matin->factortype; 2823 mat->rmap->bs = matin->rmap->bs; 2824 mat->assembled = PETSC_TRUE; 2825 mat->insertmode = NOT_SET_VALUES; 2826 mat->preallocated = PETSC_TRUE; 2827 2828 a->size = oldmat->size; 2829 a->rank = oldmat->rank; 2830 a->donotstash = oldmat->donotstash; 2831 a->roworiented = oldmat->roworiented; 2832 a->rowindices = 0; 2833 a->rowvalues = 0; 2834 a->getrowactive = PETSC_FALSE; 2835 2836 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2837 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2838 2839 if (oldmat->colmap) { 2840 #if defined (PETSC_USE_CTABLE) 2841 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2842 #else 2843 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2844 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2845 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2846 #endif 2847 } else a->colmap = 0; 2848 if (oldmat->garray) { 2849 PetscInt len; 2850 len = oldmat->B->cmap->n; 2851 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2852 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2853 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2854 } else a->garray = 0; 2855 2856 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2860 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2862 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2863 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2864 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2865 *newmat = mat; 2866 PetscFunctionReturn(0); 2867 } 2868 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatLoad_MPIAIJ" 2871 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2872 { 2873 PetscScalar *vals,*svals; 2874 MPI_Comm comm = ((PetscObject)viewer)->comm; 2875 MPI_Status status; 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2878 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2879 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2880 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2881 PetscInt cend,cstart,n,*rowners,sizesset=1; 2882 int fd; 2883 2884 PetscFunctionBegin; 2885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2887 if (!rank) { 2888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2889 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2890 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2891 } 2892 2893 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2894 2895 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2896 M = header[1]; N = header[2]; 2897 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2898 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2899 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2900 2901 /* If global sizes are set, check if they are consistent with that given in the file */ 2902 if (sizesset) { 2903 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2904 } 2905 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2906 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2907 2908 /* determine ownership of all rows */ 2909 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2910 else m = newMat->rmap->n; /* Set by user */ 2911 2912 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2913 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2914 2915 /* First process needs enough room for process with most rows */ 2916 if (!rank) { 2917 mmax = rowners[1]; 2918 for (i=2; i<size; i++) { 2919 mmax = PetscMax(mmax,rowners[i]); 2920 } 2921 } else mmax = m; 2922 2923 rowners[0] = 0; 2924 for (i=2; i<=size; i++) { 2925 rowners[i] += rowners[i-1]; 2926 } 2927 rstart = rowners[rank]; 2928 rend = rowners[rank+1]; 2929 2930 /* distribute row lengths to all processors */ 2931 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2932 if (!rank) { 2933 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2934 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2935 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2936 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2937 for (j=0; j<m; j++) { 2938 procsnz[0] += ourlens[j]; 2939 } 2940 for (i=1; i<size; i++) { 2941 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2942 /* calculate the number of nonzeros on each processor */ 2943 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2944 procsnz[i] += rowlengths[j]; 2945 } 2946 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2947 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2950 } else { 2951 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2952 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2953 } 2954 2955 if (!rank) { 2956 /* determine max buffer needed and allocate it */ 2957 maxnz = 0; 2958 for (i=0; i<size; i++) { 2959 maxnz = PetscMax(maxnz,procsnz[i]); 2960 } 2961 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2962 2963 /* read in my part of the matrix column indices */ 2964 nz = procsnz[0]; 2965 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2966 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2967 2968 /* read in every one elses and ship off */ 2969 for (i=1; i<size; i++) { 2970 nz = procsnz[i]; 2971 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2972 mpicnt = PetscMPIIntCast(nz); 2973 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2974 } 2975 ierr = PetscFree(cols);CHKERRQ(ierr); 2976 } else { 2977 /* determine buffer space needed for message */ 2978 nz = 0; 2979 for (i=0; i<m; i++) { 2980 nz += ourlens[i]; 2981 } 2982 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2983 2984 /* receive message of column indices*/ 2985 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2986 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2987 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2988 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 2989 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 2990 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 2991 } 2992 2993 /* determine column ownership if matrix is not square */ 2994 if (N != M) { 2995 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2996 else n = newMat->cmap->n; 2997 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2998 cstart = cend - n; 2999 } else { 3000 cstart = rstart; 3001 cend = rend; 3002 n = cend - cstart; 3003 } 3004 3005 /* loop over local rows, determining number of off diagonal entries */ 3006 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3007 jj = 0; 3008 for (i=0; i<m; i++) { 3009 for (j=0; j<ourlens[i]; j++) { 3010 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3011 jj++; 3012 } 3013 } 3014 3015 for (i=0; i<m; i++) { 3016 ourlens[i] -= offlens[i]; 3017 } 3018 if (!sizesset) { 3019 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3020 } 3021 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3022 3023 for (i=0; i<m; i++) { 3024 ourlens[i] += offlens[i]; 3025 } 3026 3027 if (!rank) { 3028 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3029 3030 /* read in my part of the matrix numerical values */ 3031 nz = procsnz[0]; 3032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3033 3034 /* insert into matrix */ 3035 jj = rstart; 3036 smycols = mycols; 3037 svals = vals; 3038 for (i=0; i<m; i++) { 3039 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3040 smycols += ourlens[i]; 3041 svals += ourlens[i]; 3042 jj++; 3043 } 3044 3045 /* read in other processors and ship out */ 3046 for (i=1; i<size; i++) { 3047 nz = procsnz[i]; 3048 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3049 mpicnt = PetscMPIIntCast(nz); 3050 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3051 } 3052 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3053 } else { 3054 /* receive numeric values */ 3055 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3056 3057 /* receive message of values*/ 3058 mpicnt = PetscMPIIntCast(nz); 3059 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr); 3060 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3061 if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt); 3062 else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt); 3063 else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz); 3064 3065 /* insert into matrix */ 3066 jj = rstart; 3067 smycols = mycols; 3068 svals = vals; 3069 for (i=0; i<m; i++) { 3070 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3071 smycols += ourlens[i]; 3072 svals += ourlens[i]; 3073 jj++; 3074 } 3075 } 3076 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3077 ierr = PetscFree(vals);CHKERRQ(ierr); 3078 ierr = PetscFree(mycols);CHKERRQ(ierr); 3079 ierr = PetscFree(rowners);CHKERRQ(ierr); 3080 3081 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3082 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3088 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3089 { 3090 PetscErrorCode ierr; 3091 IS iscol_local; 3092 PetscInt csize; 3093 3094 PetscFunctionBegin; 3095 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3096 if (call == MAT_REUSE_MATRIX) { 3097 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3098 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3099 } else { 3100 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3101 } 3102 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3103 if (call == MAT_INITIAL_MATRIX) { 3104 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3105 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3106 } 3107 PetscFunctionReturn(0); 3108 } 3109 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3112 /* 3113 Not great since it makes two copies of the submatrix, first an SeqAIJ 3114 in local and then by concatenating the local matrices the end result. 3115 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3116 3117 Note: This requires a sequential iscol with all indices. 3118 */ 3119 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3120 { 3121 PetscErrorCode ierr; 3122 PetscMPIInt rank,size; 3123 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3124 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3125 Mat *local,M,Mreuse; 3126 MatScalar *vwork,*aa; 3127 MPI_Comm comm = ((PetscObject)mat)->comm; 3128 Mat_SeqAIJ *aij; 3129 3130 3131 PetscFunctionBegin; 3132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3134 3135 if (call == MAT_REUSE_MATRIX) { 3136 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3137 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3138 local = &Mreuse; 3139 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3140 } else { 3141 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3142 Mreuse = *local; 3143 ierr = PetscFree(local);CHKERRQ(ierr); 3144 } 3145 3146 /* 3147 m - number of local rows 3148 n - number of columns (same on all processors) 3149 rstart - first row in new global matrix generated 3150 */ 3151 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3152 if (call == MAT_INITIAL_MATRIX) { 3153 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3154 ii = aij->i; 3155 jj = aij->j; 3156 3157 /* 3158 Determine the number of non-zeros in the diagonal and off-diagonal 3159 portions of the matrix in order to do correct preallocation 3160 */ 3161 3162 /* first get start and end of "diagonal" columns */ 3163 if (csize == PETSC_DECIDE) { 3164 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3165 if (mglobal == n) { /* square matrix */ 3166 nlocal = m; 3167 } else { 3168 nlocal = n/size + ((n % size) > rank); 3169 } 3170 } else { 3171 nlocal = csize; 3172 } 3173 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3174 rstart = rend - nlocal; 3175 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3176 3177 /* next, compute all the lengths */ 3178 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3179 olens = dlens + m; 3180 for (i=0; i<m; i++) { 3181 jend = ii[i+1] - ii[i]; 3182 olen = 0; 3183 dlen = 0; 3184 for (j=0; j<jend; j++) { 3185 if (*jj < rstart || *jj >= rend) olen++; 3186 else dlen++; 3187 jj++; 3188 } 3189 olens[i] = olen; 3190 dlens[i] = dlen; 3191 } 3192 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3193 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3194 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3195 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3196 ierr = PetscFree(dlens);CHKERRQ(ierr); 3197 } else { 3198 PetscInt ml,nl; 3199 3200 M = *newmat; 3201 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3202 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3203 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3204 /* 3205 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3206 rather than the slower MatSetValues(). 3207 */ 3208 M->was_assembled = PETSC_TRUE; 3209 M->assembled = PETSC_FALSE; 3210 } 3211 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3212 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3213 ii = aij->i; 3214 jj = aij->j; 3215 aa = aij->a; 3216 for (i=0; i<m; i++) { 3217 row = rstart + i; 3218 nz = ii[i+1] - ii[i]; 3219 cwork = jj; jj += nz; 3220 vwork = aa; aa += nz; 3221 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3222 } 3223 3224 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3226 *newmat = M; 3227 3228 /* save submatrix used in processor for next request */ 3229 if (call == MAT_INITIAL_MATRIX) { 3230 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3231 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3232 } 3233 3234 PetscFunctionReturn(0); 3235 } 3236 3237 EXTERN_C_BEGIN 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3241 { 3242 PetscInt m,cstart, cend,j,nnz,i,d; 3243 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3244 const PetscInt *JJ; 3245 PetscScalar *values; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3250 3251 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3252 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3253 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3254 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3255 m = B->rmap->n; 3256 cstart = B->cmap->rstart; 3257 cend = B->cmap->rend; 3258 rstart = B->rmap->rstart; 3259 3260 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3261 3262 #if defined(PETSC_USE_DEBUGGING) 3263 for (i=0; i<m; i++) { 3264 nnz = Ii[i+1]- Ii[i]; 3265 JJ = J + Ii[i]; 3266 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3267 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3268 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3269 } 3270 #endif 3271 3272 for (i=0; i<m; i++) { 3273 nnz = Ii[i+1]- Ii[i]; 3274 JJ = J + Ii[i]; 3275 nnz_max = PetscMax(nnz_max,nnz); 3276 d = 0; 3277 for (j=0; j<nnz; j++) { 3278 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3279 } 3280 d_nnz[i] = d; 3281 o_nnz[i] = nnz - d; 3282 } 3283 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3284 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3285 3286 if (v) values = (PetscScalar*)v; 3287 else { 3288 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3289 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3290 } 3291 3292 for (i=0; i<m; i++) { 3293 ii = i + rstart; 3294 nnz = Ii[i+1]- Ii[i]; 3295 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3296 } 3297 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3298 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3299 3300 if (!v) { 3301 ierr = PetscFree(values);CHKERRQ(ierr); 3302 } 3303 PetscFunctionReturn(0); 3304 } 3305 EXTERN_C_END 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3309 /*@ 3310 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3311 (the default parallel PETSc format). 3312 3313 Collective on MPI_Comm 3314 3315 Input Parameters: 3316 + B - the matrix 3317 . i - the indices into j for the start of each local row (starts with zero) 3318 . j - the column indices for each local row (starts with zero) 3319 - v - optional values in the matrix 3320 3321 Level: developer 3322 3323 Notes: 3324 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3325 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3326 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3327 3328 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3329 3330 The format which is used for the sparse matrix input, is equivalent to a 3331 row-major ordering.. i.e for the following matrix, the input data expected is 3332 as shown: 3333 3334 1 0 0 3335 2 0 3 P0 3336 ------- 3337 4 5 6 P1 3338 3339 Process0 [P0]: rows_owned=[0,1] 3340 i = {0,1,3} [size = nrow+1 = 2+1] 3341 j = {0,0,2} [size = nz = 6] 3342 v = {1,2,3} [size = nz = 6] 3343 3344 Process1 [P1]: rows_owned=[2] 3345 i = {0,3} [size = nrow+1 = 1+1] 3346 j = {0,1,2} [size = nz = 6] 3347 v = {4,5,6} [size = nz = 6] 3348 3349 .keywords: matrix, aij, compressed row, sparse, parallel 3350 3351 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3352 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3353 @*/ 3354 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3355 { 3356 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3357 3358 PetscFunctionBegin; 3359 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3360 if (f) { 3361 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3362 } 3363 PetscFunctionReturn(0); 3364 } 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3368 /*@C 3369 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3370 (the default parallel PETSc format). For good matrix assembly performance 3371 the user should preallocate the matrix storage by setting the parameters 3372 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3373 performance can be increased by more than a factor of 50. 3374 3375 Collective on MPI_Comm 3376 3377 Input Parameters: 3378 + A - the matrix 3379 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3380 (same value is used for all local rows) 3381 . d_nnz - array containing the number of nonzeros in the various rows of the 3382 DIAGONAL portion of the local submatrix (possibly different for each row) 3383 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3384 The size of this array is equal to the number of local rows, i.e 'm'. 3385 You must leave room for the diagonal entry even if it is zero. 3386 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3387 submatrix (same value is used for all local rows). 3388 - o_nnz - array containing the number of nonzeros in the various rows of the 3389 OFF-DIAGONAL portion of the local submatrix (possibly different for 3390 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3391 structure. The size of this array is equal to the number 3392 of local rows, i.e 'm'. 3393 3394 If the *_nnz parameter is given then the *_nz parameter is ignored 3395 3396 The AIJ format (also called the Yale sparse matrix format or 3397 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3398 storage. The stored row and column indices begin with zero. See the users manual for details. 3399 3400 The parallel matrix is partitioned such that the first m0 rows belong to 3401 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3402 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3403 3404 The DIAGONAL portion of the local submatrix of a processor can be defined 3405 as the submatrix which is obtained by extraction the part corresponding 3406 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3407 first row that belongs to the processor, and r2 is the last row belonging 3408 to the this processor. This is a square mxm matrix. The remaining portion 3409 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3410 3411 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3412 3413 You can call MatGetInfo() to get information on how effective the preallocation was; 3414 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3415 You can also run with the option -info and look for messages with the string 3416 malloc in them to see if additional memory allocation was needed. 3417 3418 Example usage: 3419 3420 Consider the following 8x8 matrix with 34 non-zero values, that is 3421 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3422 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3423 as follows: 3424 3425 .vb 3426 1 2 0 | 0 3 0 | 0 4 3427 Proc0 0 5 6 | 7 0 0 | 8 0 3428 9 0 10 | 11 0 0 | 12 0 3429 ------------------------------------- 3430 13 0 14 | 15 16 17 | 0 0 3431 Proc1 0 18 0 | 19 20 21 | 0 0 3432 0 0 0 | 22 23 0 | 24 0 3433 ------------------------------------- 3434 Proc2 25 26 27 | 0 0 28 | 29 0 3435 30 0 0 | 31 32 33 | 0 34 3436 .ve 3437 3438 This can be represented as a collection of submatrices as: 3439 3440 .vb 3441 A B C 3442 D E F 3443 G H I 3444 .ve 3445 3446 Where the submatrices A,B,C are owned by proc0, D,E,F are 3447 owned by proc1, G,H,I are owned by proc2. 3448 3449 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3450 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3451 The 'M','N' parameters are 8,8, and have the same values on all procs. 3452 3453 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3454 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3455 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3456 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3457 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3458 matrix, ans [DF] as another SeqAIJ matrix. 3459 3460 When d_nz, o_nz parameters are specified, d_nz storage elements are 3461 allocated for every row of the local diagonal submatrix, and o_nz 3462 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3463 One way to choose d_nz and o_nz is to use the max nonzerors per local 3464 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3465 In this case, the values of d_nz,o_nz are: 3466 .vb 3467 proc0 : dnz = 2, o_nz = 2 3468 proc1 : dnz = 3, o_nz = 2 3469 proc2 : dnz = 1, o_nz = 4 3470 .ve 3471 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3472 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3473 for proc3. i.e we are using 12+15+10=37 storage locations to store 3474 34 values. 3475 3476 When d_nnz, o_nnz parameters are specified, the storage is specified 3477 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3478 In the above case the values for d_nnz,o_nnz are: 3479 .vb 3480 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3481 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3482 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3483 .ve 3484 Here the space allocated is sum of all the above values i.e 34, and 3485 hence pre-allocation is perfect. 3486 3487 Level: intermediate 3488 3489 .keywords: matrix, aij, compressed row, sparse, parallel 3490 3491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3492 MPIAIJ, MatGetInfo() 3493 @*/ 3494 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3495 { 3496 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3497 3498 PetscFunctionBegin; 3499 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3500 if (f) { 3501 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3502 } 3503 PetscFunctionReturn(0); 3504 } 3505 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3508 /*@ 3509 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3510 CSR format the local rows. 3511 3512 Collective on MPI_Comm 3513 3514 Input Parameters: 3515 + comm - MPI communicator 3516 . m - number of local rows (Cannot be PETSC_DECIDE) 3517 . n - This value should be the same as the local size used in creating the 3518 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3519 calculated if N is given) For square matrices n is almost always m. 3520 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3521 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3522 . i - row indices 3523 . j - column indices 3524 - a - matrix values 3525 3526 Output Parameter: 3527 . mat - the matrix 3528 3529 Level: intermediate 3530 3531 Notes: 3532 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3533 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3534 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3535 3536 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3537 3538 The format which is used for the sparse matrix input, is equivalent to a 3539 row-major ordering.. i.e for the following matrix, the input data expected is 3540 as shown: 3541 3542 1 0 0 3543 2 0 3 P0 3544 ------- 3545 4 5 6 P1 3546 3547 Process0 [P0]: rows_owned=[0,1] 3548 i = {0,1,3} [size = nrow+1 = 2+1] 3549 j = {0,0,2} [size = nz = 6] 3550 v = {1,2,3} [size = nz = 6] 3551 3552 Process1 [P1]: rows_owned=[2] 3553 i = {0,3} [size = nrow+1 = 1+1] 3554 j = {0,1,2} [size = nz = 6] 3555 v = {4,5,6} [size = nz = 6] 3556 3557 .keywords: matrix, aij, compressed row, sparse, parallel 3558 3559 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3560 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3561 @*/ 3562 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3563 { 3564 PetscErrorCode ierr; 3565 3566 PetscFunctionBegin; 3567 if (i[0]) { 3568 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3569 } 3570 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3571 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3572 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3573 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3574 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3575 PetscFunctionReturn(0); 3576 } 3577 3578 #undef __FUNCT__ 3579 #define __FUNCT__ "MatCreateMPIAIJ" 3580 /*@C 3581 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3582 (the default parallel PETSc format). For good matrix assembly performance 3583 the user should preallocate the matrix storage by setting the parameters 3584 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3585 performance can be increased by more than a factor of 50. 3586 3587 Collective on MPI_Comm 3588 3589 Input Parameters: 3590 + comm - MPI communicator 3591 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3592 This value should be the same as the local size used in creating the 3593 y vector for the matrix-vector product y = Ax. 3594 . n - This value should be the same as the local size used in creating the 3595 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3596 calculated if N is given) For square matrices n is almost always m. 3597 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3598 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3599 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3600 (same value is used for all local rows) 3601 . d_nnz - array containing the number of nonzeros in the various rows of the 3602 DIAGONAL portion of the local submatrix (possibly different for each row) 3603 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3604 The size of this array is equal to the number of local rows, i.e 'm'. 3605 You must leave room for the diagonal entry even if it is zero. 3606 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3607 submatrix (same value is used for all local rows). 3608 - o_nnz - array containing the number of nonzeros in the various rows of the 3609 OFF-DIAGONAL portion of the local submatrix (possibly different for 3610 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3611 structure. The size of this array is equal to the number 3612 of local rows, i.e 'm'. 3613 3614 Output Parameter: 3615 . A - the matrix 3616 3617 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3618 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3619 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3620 3621 Notes: 3622 If the *_nnz parameter is given then the *_nz parameter is ignored 3623 3624 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3625 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3626 storage requirements for this matrix. 3627 3628 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3629 processor than it must be used on all processors that share the object for 3630 that argument. 3631 3632 The user MUST specify either the local or global matrix dimensions 3633 (possibly both). 3634 3635 The parallel matrix is partitioned across processors such that the 3636 first m0 rows belong to process 0, the next m1 rows belong to 3637 process 1, the next m2 rows belong to process 2 etc.. where 3638 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3639 values corresponding to [m x N] submatrix. 3640 3641 The columns are logically partitioned with the n0 columns belonging 3642 to 0th partition, the next n1 columns belonging to the next 3643 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3644 3645 The DIAGONAL portion of the local submatrix on any given processor 3646 is the submatrix corresponding to the rows and columns m,n 3647 corresponding to the given processor. i.e diagonal matrix on 3648 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3649 etc. The remaining portion of the local submatrix [m x (N-n)] 3650 constitute the OFF-DIAGONAL portion. The example below better 3651 illustrates this concept. 3652 3653 For a square global matrix we define each processor's diagonal portion 3654 to be its local rows and the corresponding columns (a square submatrix); 3655 each processor's off-diagonal portion encompasses the remainder of the 3656 local matrix (a rectangular submatrix). 3657 3658 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3659 3660 When calling this routine with a single process communicator, a matrix of 3661 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3662 type of communicator, use the construction mechanism: 3663 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3664 3665 By default, this format uses inodes (identical nodes) when possible. 3666 We search for consecutive rows with the same nonzero structure, thereby 3667 reusing matrix information to achieve increased efficiency. 3668 3669 Options Database Keys: 3670 + -mat_no_inode - Do not use inodes 3671 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3672 - -mat_aij_oneindex - Internally use indexing starting at 1 3673 rather than 0. Note that when calling MatSetValues(), 3674 the user still MUST index entries starting at 0! 3675 3676 3677 Example usage: 3678 3679 Consider the following 8x8 matrix with 34 non-zero values, that is 3680 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3681 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3682 as follows: 3683 3684 .vb 3685 1 2 0 | 0 3 0 | 0 4 3686 Proc0 0 5 6 | 7 0 0 | 8 0 3687 9 0 10 | 11 0 0 | 12 0 3688 ------------------------------------- 3689 13 0 14 | 15 16 17 | 0 0 3690 Proc1 0 18 0 | 19 20 21 | 0 0 3691 0 0 0 | 22 23 0 | 24 0 3692 ------------------------------------- 3693 Proc2 25 26 27 | 0 0 28 | 29 0 3694 30 0 0 | 31 32 33 | 0 34 3695 .ve 3696 3697 This can be represented as a collection of submatrices as: 3698 3699 .vb 3700 A B C 3701 D E F 3702 G H I 3703 .ve 3704 3705 Where the submatrices A,B,C are owned by proc0, D,E,F are 3706 owned by proc1, G,H,I are owned by proc2. 3707 3708 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3709 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3710 The 'M','N' parameters are 8,8, and have the same values on all procs. 3711 3712 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3713 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3714 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3715 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3716 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3717 matrix, ans [DF] as another SeqAIJ matrix. 3718 3719 When d_nz, o_nz parameters are specified, d_nz storage elements are 3720 allocated for every row of the local diagonal submatrix, and o_nz 3721 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3722 One way to choose d_nz and o_nz is to use the max nonzerors per local 3723 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3724 In this case, the values of d_nz,o_nz are: 3725 .vb 3726 proc0 : dnz = 2, o_nz = 2 3727 proc1 : dnz = 3, o_nz = 2 3728 proc2 : dnz = 1, o_nz = 4 3729 .ve 3730 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3731 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3732 for proc3. i.e we are using 12+15+10=37 storage locations to store 3733 34 values. 3734 3735 When d_nnz, o_nnz parameters are specified, the storage is specified 3736 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3737 In the above case the values for d_nnz,o_nnz are: 3738 .vb 3739 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3740 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3741 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3742 .ve 3743 Here the space allocated is sum of all the above values i.e 34, and 3744 hence pre-allocation is perfect. 3745 3746 Level: intermediate 3747 3748 .keywords: matrix, aij, compressed row, sparse, parallel 3749 3750 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3751 MPIAIJ, MatCreateMPIAIJWithArrays() 3752 @*/ 3753 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3754 { 3755 PetscErrorCode ierr; 3756 PetscMPIInt size; 3757 3758 PetscFunctionBegin; 3759 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3760 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3761 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3762 if (size > 1) { 3763 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3764 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3765 } else { 3766 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3767 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3768 } 3769 PetscFunctionReturn(0); 3770 } 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3774 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3775 { 3776 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3777 3778 PetscFunctionBegin; 3779 *Ad = a->A; 3780 *Ao = a->B; 3781 *colmap = a->garray; 3782 PetscFunctionReturn(0); 3783 } 3784 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3787 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3788 { 3789 PetscErrorCode ierr; 3790 PetscInt i; 3791 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3792 3793 PetscFunctionBegin; 3794 if (coloring->ctype == IS_COLORING_GLOBAL) { 3795 ISColoringValue *allcolors,*colors; 3796 ISColoring ocoloring; 3797 3798 /* set coloring for diagonal portion */ 3799 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3800 3801 /* set coloring for off-diagonal portion */ 3802 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3803 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3804 for (i=0; i<a->B->cmap->n; i++) { 3805 colors[i] = allcolors[a->garray[i]]; 3806 } 3807 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3808 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3809 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3810 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3811 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3812 ISColoringValue *colors; 3813 PetscInt *larray; 3814 ISColoring ocoloring; 3815 3816 /* set coloring for diagonal portion */ 3817 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3818 for (i=0; i<a->A->cmap->n; i++) { 3819 larray[i] = i + A->cmap->rstart; 3820 } 3821 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3822 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3823 for (i=0; i<a->A->cmap->n; i++) { 3824 colors[i] = coloring->colors[larray[i]]; 3825 } 3826 ierr = PetscFree(larray);CHKERRQ(ierr); 3827 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3828 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3829 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3830 3831 /* set coloring for off-diagonal portion */ 3832 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3833 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3834 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3835 for (i=0; i<a->B->cmap->n; i++) { 3836 colors[i] = coloring->colors[larray[i]]; 3837 } 3838 ierr = PetscFree(larray);CHKERRQ(ierr); 3839 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3840 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3841 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3842 } else { 3843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3844 } 3845 3846 PetscFunctionReturn(0); 3847 } 3848 3849 #if defined(PETSC_HAVE_ADIC) 3850 #undef __FUNCT__ 3851 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3852 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3853 { 3854 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3855 PetscErrorCode ierr; 3856 3857 PetscFunctionBegin; 3858 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3859 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3860 PetscFunctionReturn(0); 3861 } 3862 #endif 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3866 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3867 { 3868 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3869 PetscErrorCode ierr; 3870 3871 PetscFunctionBegin; 3872 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3873 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3874 PetscFunctionReturn(0); 3875 } 3876 3877 #undef __FUNCT__ 3878 #define __FUNCT__ "MatMerge" 3879 /*@ 3880 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3881 matrices from each processor 3882 3883 Collective on MPI_Comm 3884 3885 Input Parameters: 3886 + comm - the communicators the parallel matrix will live on 3887 . inmat - the input sequential matrices 3888 . n - number of local columns (or PETSC_DECIDE) 3889 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3890 3891 Output Parameter: 3892 . outmat - the parallel matrix generated 3893 3894 Level: advanced 3895 3896 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3897 3898 @*/ 3899 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3900 { 3901 PetscErrorCode ierr; 3902 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3903 PetscInt *indx; 3904 PetscScalar *values; 3905 3906 PetscFunctionBegin; 3907 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3908 if (scall == MAT_INITIAL_MATRIX){ 3909 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3910 if (n == PETSC_DECIDE){ 3911 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3912 } 3913 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3914 rstart -= m; 3915 3916 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3917 for (i=0;i<m;i++) { 3918 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3919 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3920 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3921 } 3922 /* This routine will ONLY return MPIAIJ type matrix */ 3923 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3924 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3925 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3926 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3927 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3928 3929 } else if (scall == MAT_REUSE_MATRIX){ 3930 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3931 } else { 3932 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3933 } 3934 3935 for (i=0;i<m;i++) { 3936 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3937 Ii = i + rstart; 3938 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3939 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3940 } 3941 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3942 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3943 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3944 3945 PetscFunctionReturn(0); 3946 } 3947 3948 #undef __FUNCT__ 3949 #define __FUNCT__ "MatFileSplit" 3950 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3951 { 3952 PetscErrorCode ierr; 3953 PetscMPIInt rank; 3954 PetscInt m,N,i,rstart,nnz; 3955 size_t len; 3956 const PetscInt *indx; 3957 PetscViewer out; 3958 char *name; 3959 Mat B; 3960 const PetscScalar *values; 3961 3962 PetscFunctionBegin; 3963 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3964 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3965 /* Should this be the type of the diagonal block of A? */ 3966 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3967 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3968 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3969 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3970 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3971 for (i=0;i<m;i++) { 3972 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3973 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3974 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3975 } 3976 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3977 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3978 3979 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3980 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3981 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3982 sprintf(name,"%s.%d",outfile,rank); 3983 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3984 ierr = PetscFree(name); 3985 ierr = MatView(B,out);CHKERRQ(ierr); 3986 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3987 ierr = MatDestroy(B);CHKERRQ(ierr); 3988 PetscFunctionReturn(0); 3989 } 3990 3991 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3992 #undef __FUNCT__ 3993 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3994 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3995 { 3996 PetscErrorCode ierr; 3997 Mat_Merge_SeqsToMPI *merge; 3998 PetscContainer container; 3999 4000 PetscFunctionBegin; 4001 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4002 if (container) { 4003 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4005 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4006 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4007 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4008 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4009 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4010 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4011 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4012 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4013 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4014 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4015 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4016 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4017 4018 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4019 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4020 } 4021 ierr = PetscFree(merge);CHKERRQ(ierr); 4022 4023 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4024 PetscFunctionReturn(0); 4025 } 4026 4027 #include "../src/mat/utils/freespace.h" 4028 #include "petscbt.h" 4029 4030 #undef __FUNCT__ 4031 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4032 /*@C 4033 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4034 matrices from each processor 4035 4036 Collective on MPI_Comm 4037 4038 Input Parameters: 4039 + comm - the communicators the parallel matrix will live on 4040 . seqmat - the input sequential matrices 4041 . m - number of local rows (or PETSC_DECIDE) 4042 . n - number of local columns (or PETSC_DECIDE) 4043 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4044 4045 Output Parameter: 4046 . mpimat - the parallel matrix generated 4047 4048 Level: advanced 4049 4050 Notes: 4051 The dimensions of the sequential matrix in each processor MUST be the same. 4052 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4053 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4054 @*/ 4055 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4056 { 4057 PetscErrorCode ierr; 4058 MPI_Comm comm=((PetscObject)mpimat)->comm; 4059 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4060 PetscMPIInt size,rank,taga,*len_s; 4061 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4062 PetscInt proc,m; 4063 PetscInt **buf_ri,**buf_rj; 4064 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4065 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4066 MPI_Request *s_waits,*r_waits; 4067 MPI_Status *status; 4068 MatScalar *aa=a->a; 4069 MatScalar **abuf_r,*ba_i; 4070 Mat_Merge_SeqsToMPI *merge; 4071 PetscContainer container; 4072 4073 PetscFunctionBegin; 4074 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4075 4076 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4077 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4078 4079 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4080 if (container) { 4081 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4082 } 4083 bi = merge->bi; 4084 bj = merge->bj; 4085 buf_ri = merge->buf_ri; 4086 buf_rj = merge->buf_rj; 4087 4088 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4089 owners = merge->rowmap->range; 4090 len_s = merge->len_s; 4091 4092 /* send and recv matrix values */ 4093 /*-----------------------------*/ 4094 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4095 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4096 4097 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4098 for (proc=0,k=0; proc<size; proc++){ 4099 if (!len_s[proc]) continue; 4100 i = owners[proc]; 4101 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4102 k++; 4103 } 4104 4105 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4106 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4107 ierr = PetscFree(status);CHKERRQ(ierr); 4108 4109 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4110 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4111 4112 /* insert mat values of mpimat */ 4113 /*----------------------------*/ 4114 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4115 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4116 4117 for (k=0; k<merge->nrecv; k++){ 4118 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4119 nrows = *(buf_ri_k[k]); 4120 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4121 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4122 } 4123 4124 /* set values of ba */ 4125 m = merge->rowmap->n; 4126 for (i=0; i<m; i++) { 4127 arow = owners[rank] + i; 4128 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4129 bnzi = bi[i+1] - bi[i]; 4130 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4131 4132 /* add local non-zero vals of this proc's seqmat into ba */ 4133 anzi = ai[arow+1] - ai[arow]; 4134 aj = a->j + ai[arow]; 4135 aa = a->a + ai[arow]; 4136 nextaj = 0; 4137 for (j=0; nextaj<anzi; j++){ 4138 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4139 ba_i[j] += aa[nextaj++]; 4140 } 4141 } 4142 4143 /* add received vals into ba */ 4144 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4145 /* i-th row */ 4146 if (i == *nextrow[k]) { 4147 anzi = *(nextai[k]+1) - *nextai[k]; 4148 aj = buf_rj[k] + *(nextai[k]); 4149 aa = abuf_r[k] + *(nextai[k]); 4150 nextaj = 0; 4151 for (j=0; nextaj<anzi; j++){ 4152 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4153 ba_i[j] += aa[nextaj++]; 4154 } 4155 } 4156 nextrow[k]++; nextai[k]++; 4157 } 4158 } 4159 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4160 } 4161 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4162 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4163 4164 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4165 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4166 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4167 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4168 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4169 PetscFunctionReturn(0); 4170 } 4171 4172 #undef __FUNCT__ 4173 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4174 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4175 { 4176 PetscErrorCode ierr; 4177 Mat B_mpi; 4178 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4179 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4180 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4181 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4182 PetscInt len,proc,*dnz,*onz; 4183 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4184 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4185 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4186 MPI_Status *status; 4187 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4188 PetscBT lnkbt; 4189 Mat_Merge_SeqsToMPI *merge; 4190 PetscContainer container; 4191 4192 PetscFunctionBegin; 4193 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4194 4195 /* make sure it is a PETSc comm */ 4196 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4198 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4199 4200 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4201 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4202 4203 /* determine row ownership */ 4204 /*---------------------------------------------------------*/ 4205 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4206 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4207 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4208 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4209 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4210 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4211 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4212 4213 m = merge->rowmap->n; 4214 M = merge->rowmap->N; 4215 owners = merge->rowmap->range; 4216 4217 /* determine the number of messages to send, their lengths */ 4218 /*---------------------------------------------------------*/ 4219 len_s = merge->len_s; 4220 4221 len = 0; /* length of buf_si[] */ 4222 merge->nsend = 0; 4223 for (proc=0; proc<size; proc++){ 4224 len_si[proc] = 0; 4225 if (proc == rank){ 4226 len_s[proc] = 0; 4227 } else { 4228 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4229 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4230 } 4231 if (len_s[proc]) { 4232 merge->nsend++; 4233 nrows = 0; 4234 for (i=owners[proc]; i<owners[proc+1]; i++){ 4235 if (ai[i+1] > ai[i]) nrows++; 4236 } 4237 len_si[proc] = 2*(nrows+1); 4238 len += len_si[proc]; 4239 } 4240 } 4241 4242 /* determine the number and length of messages to receive for ij-structure */ 4243 /*-------------------------------------------------------------------------*/ 4244 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4245 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4246 4247 /* post the Irecv of j-structure */ 4248 /*-------------------------------*/ 4249 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4250 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4251 4252 /* post the Isend of j-structure */ 4253 /*--------------------------------*/ 4254 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4255 4256 for (proc=0, k=0; proc<size; proc++){ 4257 if (!len_s[proc]) continue; 4258 i = owners[proc]; 4259 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4260 k++; 4261 } 4262 4263 /* receives and sends of j-structure are complete */ 4264 /*------------------------------------------------*/ 4265 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4266 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4267 4268 /* send and recv i-structure */ 4269 /*---------------------------*/ 4270 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4271 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4272 4273 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4274 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4275 for (proc=0,k=0; proc<size; proc++){ 4276 if (!len_s[proc]) continue; 4277 /* form outgoing message for i-structure: 4278 buf_si[0]: nrows to be sent 4279 [1:nrows]: row index (global) 4280 [nrows+1:2*nrows+1]: i-structure index 4281 */ 4282 /*-------------------------------------------*/ 4283 nrows = len_si[proc]/2 - 1; 4284 buf_si_i = buf_si + nrows+1; 4285 buf_si[0] = nrows; 4286 buf_si_i[0] = 0; 4287 nrows = 0; 4288 for (i=owners[proc]; i<owners[proc+1]; i++){ 4289 anzi = ai[i+1] - ai[i]; 4290 if (anzi) { 4291 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4292 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4293 nrows++; 4294 } 4295 } 4296 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4297 k++; 4298 buf_si += len_si[proc]; 4299 } 4300 4301 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4302 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4303 4304 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4305 for (i=0; i<merge->nrecv; i++){ 4306 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4307 } 4308 4309 ierr = PetscFree(len_si);CHKERRQ(ierr); 4310 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4311 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4312 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4313 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4314 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4315 ierr = PetscFree(status);CHKERRQ(ierr); 4316 4317 /* compute a local seq matrix in each processor */ 4318 /*----------------------------------------------*/ 4319 /* allocate bi array and free space for accumulating nonzero column info */ 4320 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4321 bi[0] = 0; 4322 4323 /* create and initialize a linked list */ 4324 nlnk = N+1; 4325 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4326 4327 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4328 len = 0; 4329 len = ai[owners[rank+1]] - ai[owners[rank]]; 4330 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4331 current_space = free_space; 4332 4333 /* determine symbolic info for each local row */ 4334 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4335 4336 for (k=0; k<merge->nrecv; k++){ 4337 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4338 nrows = *buf_ri_k[k]; 4339 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4340 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4341 } 4342 4343 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4344 len = 0; 4345 for (i=0;i<m;i++) { 4346 bnzi = 0; 4347 /* add local non-zero cols of this proc's seqmat into lnk */ 4348 arow = owners[rank] + i; 4349 anzi = ai[arow+1] - ai[arow]; 4350 aj = a->j + ai[arow]; 4351 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4352 bnzi += nlnk; 4353 /* add received col data into lnk */ 4354 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4355 if (i == *nextrow[k]) { /* i-th row */ 4356 anzi = *(nextai[k]+1) - *nextai[k]; 4357 aj = buf_rj[k] + *nextai[k]; 4358 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4359 bnzi += nlnk; 4360 nextrow[k]++; nextai[k]++; 4361 } 4362 } 4363 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4364 4365 /* if free space is not available, make more free space */ 4366 if (current_space->local_remaining<bnzi) { 4367 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4368 nspacedouble++; 4369 } 4370 /* copy data into free space, then initialize lnk */ 4371 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4372 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4373 4374 current_space->array += bnzi; 4375 current_space->local_used += bnzi; 4376 current_space->local_remaining -= bnzi; 4377 4378 bi[i+1] = bi[i] + bnzi; 4379 } 4380 4381 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4382 4383 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4384 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4385 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4386 4387 /* create symbolic parallel matrix B_mpi */ 4388 /*---------------------------------------*/ 4389 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4390 if (n==PETSC_DECIDE) { 4391 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4392 } else { 4393 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4394 } 4395 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4396 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4397 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4398 4399 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4400 B_mpi->assembled = PETSC_FALSE; 4401 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4402 merge->bi = bi; 4403 merge->bj = bj; 4404 merge->buf_ri = buf_ri; 4405 merge->buf_rj = buf_rj; 4406 merge->coi = PETSC_NULL; 4407 merge->coj = PETSC_NULL; 4408 merge->owners_co = PETSC_NULL; 4409 4410 /* attach the supporting struct to B_mpi for reuse */ 4411 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4412 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4413 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4414 *mpimat = B_mpi; 4415 4416 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4417 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4418 PetscFunctionReturn(0); 4419 } 4420 4421 #undef __FUNCT__ 4422 #define __FUNCT__ "MatMerge_SeqsToMPI" 4423 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4424 { 4425 PetscErrorCode ierr; 4426 4427 PetscFunctionBegin; 4428 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4429 if (scall == MAT_INITIAL_MATRIX){ 4430 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4431 } 4432 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4433 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 #undef __FUNCT__ 4438 #define __FUNCT__ "MatGetLocalMat" 4439 /*@ 4440 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4441 4442 Not Collective 4443 4444 Input Parameters: 4445 + A - the matrix 4446 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4447 4448 Output Parameter: 4449 . A_loc - the local sequential matrix generated 4450 4451 Level: developer 4452 4453 @*/ 4454 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4455 { 4456 PetscErrorCode ierr; 4457 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4458 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4459 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4460 MatScalar *aa=a->a,*ba=b->a,*cam; 4461 PetscScalar *ca; 4462 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4463 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4464 4465 PetscFunctionBegin; 4466 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4467 if (scall == MAT_INITIAL_MATRIX){ 4468 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4469 ci[0] = 0; 4470 for (i=0; i<am; i++){ 4471 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4472 } 4473 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4474 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4475 k = 0; 4476 for (i=0; i<am; i++) { 4477 ncols_o = bi[i+1] - bi[i]; 4478 ncols_d = ai[i+1] - ai[i]; 4479 /* off-diagonal portion of A */ 4480 for (jo=0; jo<ncols_o; jo++) { 4481 col = cmap[*bj]; 4482 if (col >= cstart) break; 4483 cj[k] = col; bj++; 4484 ca[k++] = *ba++; 4485 } 4486 /* diagonal portion of A */ 4487 for (j=0; j<ncols_d; j++) { 4488 cj[k] = cstart + *aj++; 4489 ca[k++] = *aa++; 4490 } 4491 /* off-diagonal portion of A */ 4492 for (j=jo; j<ncols_o; j++) { 4493 cj[k] = cmap[*bj++]; 4494 ca[k++] = *ba++; 4495 } 4496 } 4497 /* put together the new matrix */ 4498 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4499 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4500 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4501 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4502 mat->free_a = PETSC_TRUE; 4503 mat->free_ij = PETSC_TRUE; 4504 mat->nonew = 0; 4505 } else if (scall == MAT_REUSE_MATRIX){ 4506 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4507 ci = mat->i; cj = mat->j; cam = mat->a; 4508 for (i=0; i<am; i++) { 4509 /* off-diagonal portion of A */ 4510 ncols_o = bi[i+1] - bi[i]; 4511 for (jo=0; jo<ncols_o; jo++) { 4512 col = cmap[*bj]; 4513 if (col >= cstart) break; 4514 *cam++ = *ba++; bj++; 4515 } 4516 /* diagonal portion of A */ 4517 ncols_d = ai[i+1] - ai[i]; 4518 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4519 /* off-diagonal portion of A */ 4520 for (j=jo; j<ncols_o; j++) { 4521 *cam++ = *ba++; bj++; 4522 } 4523 } 4524 } else { 4525 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4526 } 4527 4528 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 #undef __FUNCT__ 4533 #define __FUNCT__ "MatGetLocalMatCondensed" 4534 /*@C 4535 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4536 4537 Not Collective 4538 4539 Input Parameters: 4540 + A - the matrix 4541 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4542 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4543 4544 Output Parameter: 4545 . A_loc - the local sequential matrix generated 4546 4547 Level: developer 4548 4549 @*/ 4550 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4551 { 4552 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4553 PetscErrorCode ierr; 4554 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4555 IS isrowa,iscola; 4556 Mat *aloc; 4557 4558 PetscFunctionBegin; 4559 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4560 if (!row){ 4561 start = A->rmap->rstart; end = A->rmap->rend; 4562 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4563 } else { 4564 isrowa = *row; 4565 } 4566 if (!col){ 4567 start = A->cmap->rstart; 4568 cmap = a->garray; 4569 nzA = a->A->cmap->n; 4570 nzB = a->B->cmap->n; 4571 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4572 ncols = 0; 4573 for (i=0; i<nzB; i++) { 4574 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4575 else break; 4576 } 4577 imark = i; 4578 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4579 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4580 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4581 ierr = PetscFree(idx);CHKERRQ(ierr); 4582 } else { 4583 iscola = *col; 4584 } 4585 if (scall != MAT_INITIAL_MATRIX){ 4586 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4587 aloc[0] = *A_loc; 4588 } 4589 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4590 *A_loc = aloc[0]; 4591 ierr = PetscFree(aloc);CHKERRQ(ierr); 4592 if (!row){ 4593 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4594 } 4595 if (!col){ 4596 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4597 } 4598 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4599 PetscFunctionReturn(0); 4600 } 4601 4602 #undef __FUNCT__ 4603 #define __FUNCT__ "MatGetBrowsOfAcols" 4604 /*@C 4605 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4606 4607 Collective on Mat 4608 4609 Input Parameters: 4610 + A,B - the matrices in mpiaij format 4611 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4612 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4613 4614 Output Parameter: 4615 + rowb, colb - index sets of rows and columns of B to extract 4616 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4617 - B_seq - the sequential matrix generated 4618 4619 Level: developer 4620 4621 @*/ 4622 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4623 { 4624 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4625 PetscErrorCode ierr; 4626 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4627 IS isrowb,iscolb; 4628 Mat *bseq; 4629 4630 PetscFunctionBegin; 4631 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4632 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4633 } 4634 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4635 4636 if (scall == MAT_INITIAL_MATRIX){ 4637 start = A->cmap->rstart; 4638 cmap = a->garray; 4639 nzA = a->A->cmap->n; 4640 nzB = a->B->cmap->n; 4641 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4642 ncols = 0; 4643 for (i=0; i<nzB; i++) { /* row < local row index */ 4644 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4645 else break; 4646 } 4647 imark = i; 4648 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4649 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4650 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4651 ierr = PetscFree(idx);CHKERRQ(ierr); 4652 *brstart = imark; 4653 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4654 } else { 4655 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4656 isrowb = *rowb; iscolb = *colb; 4657 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4658 bseq[0] = *B_seq; 4659 } 4660 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4661 *B_seq = bseq[0]; 4662 ierr = PetscFree(bseq);CHKERRQ(ierr); 4663 if (!rowb){ 4664 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4665 } else { 4666 *rowb = isrowb; 4667 } 4668 if (!colb){ 4669 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4670 } else { 4671 *colb = iscolb; 4672 } 4673 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4674 PetscFunctionReturn(0); 4675 } 4676 4677 #undef __FUNCT__ 4678 #define __FUNCT__ "MatGetBrowsOfAoCols" 4679 /*@C 4680 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4681 of the OFF-DIAGONAL portion of local A 4682 4683 Collective on Mat 4684 4685 Input Parameters: 4686 + A,B - the matrices in mpiaij format 4687 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4688 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4689 . startsj_r - similar to startsj for receives 4690 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4691 4692 Output Parameter: 4693 + B_oth - the sequential matrix generated 4694 4695 Level: developer 4696 4697 @*/ 4698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4699 { 4700 VecScatter_MPI_General *gen_to,*gen_from; 4701 PetscErrorCode ierr; 4702 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4703 Mat_SeqAIJ *b_oth; 4704 VecScatter ctx=a->Mvctx; 4705 MPI_Comm comm=((PetscObject)ctx)->comm; 4706 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4707 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4708 PetscScalar *rvalues,*svalues; 4709 MatScalar *b_otha,*bufa,*bufA; 4710 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4711 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4712 MPI_Status *sstatus,rstatus; 4713 PetscMPIInt jj; 4714 PetscInt *cols,sbs,rbs; 4715 PetscScalar *vals; 4716 4717 PetscFunctionBegin; 4718 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4719 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4720 } 4721 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4722 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4723 4724 gen_to = (VecScatter_MPI_General*)ctx->todata; 4725 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4726 rvalues = gen_from->values; /* holds the length of receiving row */ 4727 svalues = gen_to->values; /* holds the length of sending row */ 4728 nrecvs = gen_from->n; 4729 nsends = gen_to->n; 4730 4731 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4732 srow = gen_to->indices; /* local row index to be sent */ 4733 sstarts = gen_to->starts; 4734 sprocs = gen_to->procs; 4735 sstatus = gen_to->sstatus; 4736 sbs = gen_to->bs; 4737 rstarts = gen_from->starts; 4738 rprocs = gen_from->procs; 4739 rbs = gen_from->bs; 4740 4741 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4742 if (scall == MAT_INITIAL_MATRIX){ 4743 /* i-array */ 4744 /*---------*/ 4745 /* post receives */ 4746 for (i=0; i<nrecvs; i++){ 4747 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4748 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4749 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4750 } 4751 4752 /* pack the outgoing message */ 4753 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4754 sstartsj[0] = 0; rstartsj[0] = 0; 4755 len = 0; /* total length of j or a array to be sent */ 4756 k = 0; 4757 for (i=0; i<nsends; i++){ 4758 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4759 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4760 for (j=0; j<nrows; j++) { 4761 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4762 for (l=0; l<sbs; l++){ 4763 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4764 rowlen[j*sbs+l] = ncols; 4765 len += ncols; 4766 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4767 } 4768 k++; 4769 } 4770 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4771 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4772 } 4773 /* recvs and sends of i-array are completed */ 4774 i = nrecvs; 4775 while (i--) { 4776 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4777 } 4778 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4779 4780 /* allocate buffers for sending j and a arrays */ 4781 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4782 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4783 4784 /* create i-array of B_oth */ 4785 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4786 b_othi[0] = 0; 4787 len = 0; /* total length of j or a array to be received */ 4788 k = 0; 4789 for (i=0; i<nrecvs; i++){ 4790 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4791 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4792 for (j=0; j<nrows; j++) { 4793 b_othi[k+1] = b_othi[k] + rowlen[j]; 4794 len += rowlen[j]; k++; 4795 } 4796 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4797 } 4798 4799 /* allocate space for j and a arrrays of B_oth */ 4800 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4801 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4802 4803 /* j-array */ 4804 /*---------*/ 4805 /* post receives of j-array */ 4806 for (i=0; i<nrecvs; i++){ 4807 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4808 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4809 } 4810 4811 /* pack the outgoing message j-array */ 4812 k = 0; 4813 for (i=0; i<nsends; i++){ 4814 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4815 bufJ = bufj+sstartsj[i]; 4816 for (j=0; j<nrows; j++) { 4817 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4818 for (ll=0; ll<sbs; ll++){ 4819 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4820 for (l=0; l<ncols; l++){ 4821 *bufJ++ = cols[l]; 4822 } 4823 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4824 } 4825 } 4826 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4827 } 4828 4829 /* recvs and sends of j-array are completed */ 4830 i = nrecvs; 4831 while (i--) { 4832 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4833 } 4834 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4835 } else if (scall == MAT_REUSE_MATRIX){ 4836 sstartsj = *startsj; 4837 rstartsj = *startsj_r; 4838 bufa = *bufa_ptr; 4839 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4840 b_otha = b_oth->a; 4841 } else { 4842 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4843 } 4844 4845 /* a-array */ 4846 /*---------*/ 4847 /* post receives of a-array */ 4848 for (i=0; i<nrecvs; i++){ 4849 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4850 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4851 } 4852 4853 /* pack the outgoing message a-array */ 4854 k = 0; 4855 for (i=0; i<nsends; i++){ 4856 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4857 bufA = bufa+sstartsj[i]; 4858 for (j=0; j<nrows; j++) { 4859 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4860 for (ll=0; ll<sbs; ll++){ 4861 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4862 for (l=0; l<ncols; l++){ 4863 *bufA++ = vals[l]; 4864 } 4865 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4866 } 4867 } 4868 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4869 } 4870 /* recvs and sends of a-array are completed */ 4871 i = nrecvs; 4872 while (i--) { 4873 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4874 } 4875 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4876 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4877 4878 if (scall == MAT_INITIAL_MATRIX){ 4879 /* put together the new matrix */ 4880 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4881 4882 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4883 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4884 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4885 b_oth->free_a = PETSC_TRUE; 4886 b_oth->free_ij = PETSC_TRUE; 4887 b_oth->nonew = 0; 4888 4889 ierr = PetscFree(bufj);CHKERRQ(ierr); 4890 if (!startsj || !bufa_ptr){ 4891 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4892 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4893 } else { 4894 *startsj = sstartsj; 4895 *startsj_r = rstartsj; 4896 *bufa_ptr = bufa; 4897 } 4898 } 4899 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatGetCommunicationStructs" 4905 /*@C 4906 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4907 4908 Not Collective 4909 4910 Input Parameters: 4911 . A - The matrix in mpiaij format 4912 4913 Output Parameter: 4914 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4915 . colmap - A map from global column index to local index into lvec 4916 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4917 4918 Level: developer 4919 4920 @*/ 4921 #if defined (PETSC_USE_CTABLE) 4922 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4923 #else 4924 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4925 #endif 4926 { 4927 Mat_MPIAIJ *a; 4928 4929 PetscFunctionBegin; 4930 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4931 PetscValidPointer(lvec, 2); 4932 PetscValidPointer(colmap, 3); 4933 PetscValidPointer(multScatter, 4); 4934 a = (Mat_MPIAIJ *) A->data; 4935 if (lvec) *lvec = a->lvec; 4936 if (colmap) *colmap = a->colmap; 4937 if (multScatter) *multScatter = a->Mvctx; 4938 PetscFunctionReturn(0); 4939 } 4940 4941 EXTERN_C_BEGIN 4942 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4943 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4944 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4945 EXTERN_C_END 4946 4947 #undef __FUNCT__ 4948 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4949 /* 4950 Computes (B'*A')' since computing B*A directly is untenable 4951 4952 n p p 4953 ( ) ( ) ( ) 4954 m ( A ) * n ( B ) = m ( C ) 4955 ( ) ( ) ( ) 4956 4957 */ 4958 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4959 { 4960 PetscErrorCode ierr; 4961 Mat At,Bt,Ct; 4962 4963 PetscFunctionBegin; 4964 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4965 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4966 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4967 ierr = MatDestroy(At);CHKERRQ(ierr); 4968 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4969 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4970 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4971 PetscFunctionReturn(0); 4972 } 4973 4974 #undef __FUNCT__ 4975 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4976 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4977 { 4978 PetscErrorCode ierr; 4979 PetscInt m=A->rmap->n,n=B->cmap->n; 4980 Mat Cmat; 4981 4982 PetscFunctionBegin; 4983 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4984 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4985 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4986 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4987 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4988 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4989 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4990 *C = Cmat; 4991 PetscFunctionReturn(0); 4992 } 4993 4994 /* ----------------------------------------------------------------*/ 4995 #undef __FUNCT__ 4996 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4997 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4998 { 4999 PetscErrorCode ierr; 5000 5001 PetscFunctionBegin; 5002 if (scall == MAT_INITIAL_MATRIX){ 5003 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5004 } 5005 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5006 PetscFunctionReturn(0); 5007 } 5008 5009 EXTERN_C_BEGIN 5010 #if defined(PETSC_HAVE_MUMPS) 5011 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5012 #endif 5013 #if defined(PETSC_HAVE_PASTIX) 5014 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5015 #endif 5016 #if defined(PETSC_HAVE_SUPERLU_DIST) 5017 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5018 #endif 5019 #if defined(PETSC_HAVE_SPOOLES) 5020 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5021 #endif 5022 EXTERN_C_END 5023 5024 /*MC 5025 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5026 5027 Options Database Keys: 5028 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5029 5030 Level: beginner 5031 5032 .seealso: MatCreateMPIAIJ() 5033 M*/ 5034 5035 EXTERN_C_BEGIN 5036 #undef __FUNCT__ 5037 #define __FUNCT__ "MatCreate_MPIAIJ" 5038 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5039 { 5040 Mat_MPIAIJ *b; 5041 PetscErrorCode ierr; 5042 PetscMPIInt size; 5043 5044 PetscFunctionBegin; 5045 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5046 5047 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5048 B->data = (void*)b; 5049 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5050 B->rmap->bs = 1; 5051 B->assembled = PETSC_FALSE; 5052 B->mapping = 0; 5053 5054 B->insertmode = NOT_SET_VALUES; 5055 b->size = size; 5056 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5057 5058 /* build cache for off array entries formed */ 5059 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5060 b->donotstash = PETSC_FALSE; 5061 b->colmap = 0; 5062 b->garray = 0; 5063 b->roworiented = PETSC_TRUE; 5064 5065 /* stuff used for matrix vector multiply */ 5066 b->lvec = PETSC_NULL; 5067 b->Mvctx = PETSC_NULL; 5068 5069 /* stuff for MatGetRow() */ 5070 b->rowindices = 0; 5071 b->rowvalues = 0; 5072 b->getrowactive = PETSC_FALSE; 5073 5074 #if defined(PETSC_HAVE_SPOOLES) 5075 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5076 "MatGetFactor_mpiaij_spooles", 5077 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5078 #endif 5079 #if defined(PETSC_HAVE_MUMPS) 5080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5081 "MatGetFactor_aij_mumps", 5082 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5083 #endif 5084 #if defined(PETSC_HAVE_PASTIX) 5085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5086 "MatGetFactor_mpiaij_pastix", 5087 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5088 #endif 5089 #if defined(PETSC_HAVE_SUPERLU_DIST) 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5091 "MatGetFactor_mpiaij_superlu_dist", 5092 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5093 #endif 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5095 "MatStoreValues_MPIAIJ", 5096 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5098 "MatRetrieveValues_MPIAIJ", 5099 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5101 "MatGetDiagonalBlock_MPIAIJ", 5102 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5104 "MatIsTranspose_MPIAIJ", 5105 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5107 "MatMPIAIJSetPreallocation_MPIAIJ", 5108 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5110 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5111 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5113 "MatDiagonalScaleLocal_MPIAIJ", 5114 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5116 "MatConvert_MPIAIJ_MPICSRPERM", 5117 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5119 "MatConvert_MPIAIJ_MPICRL", 5120 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5122 "MatConvert_MPIAIJ_MPISBAIJ", 5123 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5125 "MatMatMult_MPIDense_MPIAIJ", 5126 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5128 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5129 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5131 "MatMatMultNumeric_MPIDense_MPIAIJ", 5132 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5133 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5134 PetscFunctionReturn(0); 5135 } 5136 EXTERN_C_END 5137 5138 #undef __FUNCT__ 5139 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5140 /*@ 5141 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5142 and "off-diagonal" part of the matrix in CSR format. 5143 5144 Collective on MPI_Comm 5145 5146 Input Parameters: 5147 + comm - MPI communicator 5148 . m - number of local rows (Cannot be PETSC_DECIDE) 5149 . n - This value should be the same as the local size used in creating the 5150 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5151 calculated if N is given) For square matrices n is almost always m. 5152 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5153 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5154 . i - row indices for "diagonal" portion of matrix 5155 . j - column indices 5156 . a - matrix values 5157 . oi - row indices for "off-diagonal" portion of matrix 5158 . oj - column indices 5159 - oa - matrix values 5160 5161 Output Parameter: 5162 . mat - the matrix 5163 5164 Level: advanced 5165 5166 Notes: 5167 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5168 5169 The i and j indices are 0 based 5170 5171 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5172 5173 This sets local rows and cannot be used to set off-processor values. 5174 5175 You cannot later use MatSetValues() to change values in this matrix. 5176 5177 .keywords: matrix, aij, compressed row, sparse, parallel 5178 5179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5180 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5181 @*/ 5182 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5183 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5184 { 5185 PetscErrorCode ierr; 5186 Mat_MPIAIJ *maij; 5187 5188 PetscFunctionBegin; 5189 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5190 if (i[0]) { 5191 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5192 } 5193 if (oi[0]) { 5194 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5195 } 5196 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5197 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5198 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5199 maij = (Mat_MPIAIJ*) (*mat)->data; 5200 maij->donotstash = PETSC_TRUE; 5201 (*mat)->preallocated = PETSC_TRUE; 5202 5203 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5204 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5205 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5206 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5207 5208 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5209 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5210 5211 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5212 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5213 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5214 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5215 5216 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5217 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5218 PetscFunctionReturn(0); 5219 } 5220 5221 /* 5222 Special version for direct calls from Fortran 5223 */ 5224 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5225 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5226 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5227 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5228 #endif 5229 5230 /* Change these macros so can be used in void function */ 5231 #undef CHKERRQ 5232 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5233 #undef SETERRQ2 5234 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5235 #undef SETERRQ 5236 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5237 5238 EXTERN_C_BEGIN 5239 #undef __FUNCT__ 5240 #define __FUNCT__ "matsetvaluesmpiaij_" 5241 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5242 { 5243 Mat mat = *mmat; 5244 PetscInt m = *mm, n = *mn; 5245 InsertMode addv = *maddv; 5246 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5247 PetscScalar value; 5248 PetscErrorCode ierr; 5249 5250 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5251 if (mat->insertmode == NOT_SET_VALUES) { 5252 mat->insertmode = addv; 5253 } 5254 #if defined(PETSC_USE_DEBUG) 5255 else if (mat->insertmode != addv) { 5256 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5257 } 5258 #endif 5259 { 5260 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5261 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5262 PetscTruth roworiented = aij->roworiented; 5263 5264 /* Some Variables required in the macro */ 5265 Mat A = aij->A; 5266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5267 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5268 MatScalar *aa = a->a; 5269 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5270 Mat B = aij->B; 5271 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5272 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5273 MatScalar *ba = b->a; 5274 5275 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5276 PetscInt nonew = a->nonew; 5277 MatScalar *ap1,*ap2; 5278 5279 PetscFunctionBegin; 5280 for (i=0; i<m; i++) { 5281 if (im[i] < 0) continue; 5282 #if defined(PETSC_USE_DEBUG) 5283 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5284 #endif 5285 if (im[i] >= rstart && im[i] < rend) { 5286 row = im[i] - rstart; 5287 lastcol1 = -1; 5288 rp1 = aj + ai[row]; 5289 ap1 = aa + ai[row]; 5290 rmax1 = aimax[row]; 5291 nrow1 = ailen[row]; 5292 low1 = 0; 5293 high1 = nrow1; 5294 lastcol2 = -1; 5295 rp2 = bj + bi[row]; 5296 ap2 = ba + bi[row]; 5297 rmax2 = bimax[row]; 5298 nrow2 = bilen[row]; 5299 low2 = 0; 5300 high2 = nrow2; 5301 5302 for (j=0; j<n; j++) { 5303 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5304 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5305 if (in[j] >= cstart && in[j] < cend){ 5306 col = in[j] - cstart; 5307 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5308 } else if (in[j] < 0) continue; 5309 #if defined(PETSC_USE_DEBUG) 5310 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5311 #endif 5312 else { 5313 if (mat->was_assembled) { 5314 if (!aij->colmap) { 5315 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5316 } 5317 #if defined (PETSC_USE_CTABLE) 5318 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5319 col--; 5320 #else 5321 col = aij->colmap[in[j]] - 1; 5322 #endif 5323 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5324 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5325 col = in[j]; 5326 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5327 B = aij->B; 5328 b = (Mat_SeqAIJ*)B->data; 5329 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5330 rp2 = bj + bi[row]; 5331 ap2 = ba + bi[row]; 5332 rmax2 = bimax[row]; 5333 nrow2 = bilen[row]; 5334 low2 = 0; 5335 high2 = nrow2; 5336 bm = aij->B->rmap->n; 5337 ba = b->a; 5338 } 5339 } else col = in[j]; 5340 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5341 } 5342 } 5343 } else { 5344 if (!aij->donotstash) { 5345 if (roworiented) { 5346 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5347 } else { 5348 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5349 } 5350 } 5351 } 5352 }} 5353 PetscFunctionReturnVoid(); 5354 } 5355 EXTERN_C_END 5356 5357