1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 511 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 512 513 /* determine if any processor has disassembled, if so we must 514 also disassemble ourselfs, in order that we may reassemble. */ 515 /* 516 if nonzero structure of submatrix B cannot change then we know that 517 no processor disassembled thus we can skip this stuff 518 */ 519 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 520 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 521 if (mat->was_assembled && !other_disassembled) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 } 524 } 525 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 526 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 527 } 528 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 529 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 530 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 532 533 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 534 aij->rowvalues = 0; 535 536 /* used by MatAXPY() */ 537 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 538 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 539 540 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 541 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 548 { 549 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatZeroRows_MPIAIJ" 560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 561 { 562 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 563 PetscErrorCode ierr; 564 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 565 PetscInt i,*owners = A->rmap->range; 566 PetscInt *nprocs,j,idx,nsends,row; 567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 568 PetscInt *rvalues,count,base,slen,*source; 569 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 570 MPI_Comm comm = ((PetscObject)A)->comm; 571 MPI_Request *send_waits,*recv_waits; 572 MPI_Status recv_status,*send_status; 573 const PetscScalar *xx; 574 PetscScalar *bb; 575 #if defined(PETSC_DEBUG) 576 PetscBool found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 if (A->nooffproczerorows) { 607 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 608 nrecvs = nsends; 609 nmax = N; 610 } else { 611 /* inform other processors of number of messages and max length*/ 612 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 613 } 614 615 /* post receives: */ 616 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 617 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 618 for (i=0; i<nrecvs; i++) { 619 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 620 } 621 622 /* do sends: 623 1) starts[i] gives the starting index in svalues for stuff going to 624 the ith processor 625 */ 626 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 627 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 628 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 629 starts[0] = 0; 630 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 for (i=0; i<N; i++) { 632 svalues[starts[owner[i]]++] = rows[i]; 633 } 634 635 starts[0] = 0; 636 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 637 count = 0; 638 for (i=0; i<size; i++) { 639 if (nprocs[2*i+1]) { 640 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 641 } 642 } 643 ierr = PetscFree(starts);CHKERRQ(ierr); 644 645 base = owners[rank]; 646 647 /* wait on receives */ 648 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 649 count = nrecvs; slen = 0; 650 while (count) { 651 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 652 /* unpack receives into our local space */ 653 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 654 source[imdex] = recv_status.MPI_SOURCE; 655 lens[imdex] = n; 656 slen += n; 657 count--; 658 } 659 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 660 661 /* move the data into the send scatter */ 662 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 663 count = 0; 664 for (i=0; i<nrecvs; i++) { 665 values = rvalues + i*nmax; 666 for (j=0; j<lens[i]; j++) { 667 lrows[count++] = values[j] - base; 668 } 669 } 670 ierr = PetscFree(rvalues);CHKERRQ(ierr); 671 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 672 ierr = PetscFree(owner);CHKERRQ(ierr); 673 ierr = PetscFree(nprocs);CHKERRQ(ierr); 674 675 /* fix right hand side if needed */ 676 if (x && b) { 677 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 678 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 679 for (i=0; i<slen; i++) { 680 bb[lrows[i]] = diag*xx[lrows[i]]; 681 } 682 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 683 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 684 } 685 /* 686 Zero the required rows. If the "diagonal block" of the matrix 687 is square and the user wishes to set the diagonal we use separate 688 code so that MatSetValues() is not called for each diagonal allocating 689 new memory, thus calling lots of mallocs and slowing things down. 690 691 */ 692 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 693 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 694 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 695 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 696 } else if (diag != 0.0) { 697 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 698 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 699 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 700 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 701 } 702 for (i = 0; i < slen; i++) { 703 row = lrows[i] + rstart; 704 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 705 } 706 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 } else { 709 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 710 } 711 ierr = PetscFree(lrows);CHKERRQ(ierr); 712 713 /* wait on sends */ 714 if (nsends) { 715 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 716 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 717 ierr = PetscFree(send_status);CHKERRQ(ierr); 718 } 719 ierr = PetscFree(send_waits);CHKERRQ(ierr); 720 ierr = PetscFree(svalues);CHKERRQ(ierr); 721 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 727 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 728 { 729 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 730 PetscErrorCode ierr; 731 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 732 PetscInt i,*owners = A->rmap->range; 733 PetscInt *nprocs,j,idx,nsends; 734 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 735 PetscInt *rvalues,count,base,slen,*source; 736 PetscInt *lens,*lrows,*values,m; 737 MPI_Comm comm = ((PetscObject)A)->comm; 738 MPI_Request *send_waits,*recv_waits; 739 MPI_Status recv_status,*send_status; 740 const PetscScalar *xx; 741 PetscScalar *bb,*mask; 742 Vec xmask,lmask; 743 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 744 const PetscInt *aj, *ii,*ridx; 745 PetscScalar *aa; 746 #if defined(PETSC_DEBUG) 747 PetscBool found = PETSC_FALSE; 748 #endif 749 750 PetscFunctionBegin; 751 /* first count number of contributors to each processor */ 752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 755 j = 0; 756 for (i=0; i<N; i++) { 757 if (lastidx > (idx = rows[i])) j = 0; 758 lastidx = idx; 759 for (; j<size; j++) { 760 if (idx >= owners[j] && idx < owners[j+1]) { 761 nprocs[2*j]++; 762 nprocs[2*j+1] = 1; 763 owner[i] = j; 764 #if defined(PETSC_DEBUG) 765 found = PETSC_TRUE; 766 #endif 767 break; 768 } 769 } 770 #if defined(PETSC_DEBUG) 771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 772 found = PETSC_FALSE; 773 #endif 774 } 775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 776 777 /* inform other processors of number of messages and max length*/ 778 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 779 780 /* post receives: */ 781 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 782 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 783 for (i=0; i<nrecvs; i++) { 784 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 785 } 786 787 /* do sends: 788 1) starts[i] gives the starting index in svalues for stuff going to 789 the ith processor 790 */ 791 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 792 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 793 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 794 starts[0] = 0; 795 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 796 for (i=0; i<N; i++) { 797 svalues[starts[owner[i]]++] = rows[i]; 798 } 799 800 starts[0] = 0; 801 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 802 count = 0; 803 for (i=0; i<size; i++) { 804 if (nprocs[2*i+1]) { 805 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 806 } 807 } 808 ierr = PetscFree(starts);CHKERRQ(ierr); 809 810 base = owners[rank]; 811 812 /* wait on receives */ 813 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 814 count = nrecvs; slen = 0; 815 while (count) { 816 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 817 /* unpack receives into our local space */ 818 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 819 source[imdex] = recv_status.MPI_SOURCE; 820 lens[imdex] = n; 821 slen += n; 822 count--; 823 } 824 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 825 826 /* move the data into the send scatter */ 827 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 828 count = 0; 829 for (i=0; i<nrecvs; i++) { 830 values = rvalues + i*nmax; 831 for (j=0; j<lens[i]; j++) { 832 lrows[count++] = values[j] - base; 833 } 834 } 835 ierr = PetscFree(rvalues);CHKERRQ(ierr); 836 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 837 ierr = PetscFree(owner);CHKERRQ(ierr); 838 ierr = PetscFree(nprocs);CHKERRQ(ierr); 839 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 840 841 /* zero diagonal part of matrix */ 842 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 843 844 /* handle off diagonal part of matrix */ 845 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 846 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 847 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 848 for (i=0; i<slen; i++) { 849 bb[lrows[i]] = 1; 850 } 851 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 852 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 854 ierr = VecDestroy(xmask);CHKERRQ(ierr); 855 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 858 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 859 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 860 861 /* remove zeroed rows of off diagonal matrix */ 862 ii = aij->i; 863 for (i=0; i<slen; i++) { 864 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 865 } 866 867 /* loop over all elements of off process part of matrix zeroing removed columns*/ 868 if (aij->compressedrow.use){ 869 m = aij->compressedrow.nrows; 870 ii = aij->compressedrow.i; 871 ridx = aij->compressedrow.rindex; 872 for (i=0; i<m; i++){ 873 n = ii[i+1] - ii[i]; 874 aj = aij->j + ii[i]; 875 aa = aij->a + ii[i]; 876 877 for (j=0; j<n; j++) { 878 if (PetscAbsScalar(mask[*aj])) { 879 bb[*ridx] -= *aa*xx[*aj]; 880 *aa = 0.0; 881 } 882 aa++; 883 aj++; 884 } 885 ridx++; 886 } 887 } else { /* do not use compressed row format */ 888 m = l->B->rmap->n; 889 for (i=0; i<m; i++) { 890 n = ii[i+1] - ii[i]; 891 aj = aij->j + ii[i]; 892 aa = aij->a + ii[i]; 893 for (j=0; j<n; j++) { 894 if (PetscAbsScalar(mask[*aj])) { 895 bb[i] -= *aa*xx[*aj]; 896 *aa = 0.0; 897 } 898 aa++; 899 aj++; 900 } 901 } 902 } 903 904 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 905 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 906 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 907 ierr = VecDestroy(lmask);CHKERRQ(ierr); 908 ierr = PetscFree(lrows);CHKERRQ(ierr); 909 910 /* wait on sends */ 911 if (nsends) { 912 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 913 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 914 ierr = PetscFree(send_status);CHKERRQ(ierr); 915 } 916 ierr = PetscFree(send_waits);CHKERRQ(ierr); 917 ierr = PetscFree(svalues);CHKERRQ(ierr); 918 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatMult_MPIAIJ" 924 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 925 { 926 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 927 PetscErrorCode ierr; 928 PetscInt nt; 929 930 PetscFunctionBegin; 931 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 932 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 933 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 934 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 935 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 942 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 943 { 944 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultAdd_MPIAIJ" 954 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 962 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 969 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 970 { 971 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 972 PetscErrorCode ierr; 973 PetscBool merged; 974 975 PetscFunctionBegin; 976 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 977 /* do nondiagonal part */ 978 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 979 if (!merged) { 980 /* send it on its way */ 981 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 982 /* do local part */ 983 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 984 /* receive remote parts: note this assumes the values are not actually */ 985 /* added in yy until the next line, */ 986 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 987 } else { 988 /* do local part */ 989 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 990 /* send it on its way */ 991 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 992 /* values actually were received in the Begin() but we need to call this nop */ 993 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 } 995 PetscFunctionReturn(0); 996 } 997 998 EXTERN_C_BEGIN 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1002 { 1003 MPI_Comm comm; 1004 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1005 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1006 IS Me,Notme; 1007 PetscErrorCode ierr; 1008 PetscInt M,N,first,last,*notme,i; 1009 PetscMPIInt size; 1010 1011 PetscFunctionBegin; 1012 1013 /* Easy test: symmetric diagonal block */ 1014 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1015 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1016 if (!*f) PetscFunctionReturn(0); 1017 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1018 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1019 if (size == 1) PetscFunctionReturn(0); 1020 1021 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1022 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1023 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1024 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1025 for (i=0; i<first; i++) notme[i] = i; 1026 for (i=last; i<M; i++) notme[i-last+first] = i; 1027 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1028 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1029 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1030 Aoff = Aoffs[0]; 1031 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1032 Boff = Boffs[0]; 1033 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1034 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1035 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1036 ierr = ISDestroy(Me);CHKERRQ(ierr); 1037 ierr = ISDestroy(Notme);CHKERRQ(ierr); 1038 1039 PetscFunctionReturn(0); 1040 } 1041 EXTERN_C_END 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1045 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1046 { 1047 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 /* do nondiagonal part */ 1052 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1053 /* send it on its way */ 1054 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1055 /* do local part */ 1056 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1057 /* receive remote parts */ 1058 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 /* 1063 This only works correctly for square matrices where the subblock A->A is the 1064 diagonal block 1065 */ 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1068 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1069 { 1070 PetscErrorCode ierr; 1071 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1072 1073 PetscFunctionBegin; 1074 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1075 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1076 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatScale_MPIAIJ" 1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1089 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatDestroy_MPIAIJ" 1095 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1096 { 1097 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 #if defined(PETSC_USE_LOG) 1102 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1103 #endif 1104 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1105 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 1106 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 1107 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 1108 #if defined (PETSC_USE_CTABLE) 1109 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 1110 #else 1111 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1112 #endif 1113 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1114 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 1115 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 1116 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1117 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1118 ierr = PetscFree(aij);CHKERRQ(ierr); 1119 1120 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1134 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1135 { 1136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1137 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1138 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1139 PetscErrorCode ierr; 1140 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1141 int fd; 1142 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1143 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1144 PetscScalar *column_values; 1145 PetscInt message_count,flowcontrolcount; 1146 1147 PetscFunctionBegin; 1148 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1149 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1150 nz = A->nz + B->nz; 1151 if (!rank) { 1152 header[0] = MAT_FILE_CLASSID; 1153 header[1] = mat->rmap->N; 1154 header[2] = mat->cmap->N; 1155 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1156 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1157 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1158 /* get largest number of rows any processor has */ 1159 rlen = mat->rmap->n; 1160 range = mat->rmap->range; 1161 for (i=1; i<size; i++) { 1162 rlen = PetscMax(rlen,range[i+1] - range[i]); 1163 } 1164 } else { 1165 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1166 rlen = mat->rmap->n; 1167 } 1168 1169 /* load up the local row counts */ 1170 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1171 for (i=0; i<mat->rmap->n; i++) { 1172 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1173 } 1174 1175 /* store the row lengths to the file */ 1176 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1177 if (!rank) { 1178 MPI_Status status; 1179 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 for (i=1; i<size; i++) { 1181 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1182 rlen = range[i+1] - range[i]; 1183 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 } 1186 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1187 } else { 1188 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1189 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1190 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1191 } 1192 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1193 1194 /* load up the local column indices */ 1195 nzmax = nz; /* )th processor needs space a largest processor needs */ 1196 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1197 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<mat->rmap->n; i++) { 1200 for (j=B->i[i]; j<B->i[i+1]; j++) { 1201 if ( (col = garray[B->j[j]]) > cstart) break; 1202 column_indices[cnt++] = col; 1203 } 1204 for (k=A->i[i]; k<A->i[i+1]; k++) { 1205 column_indices[cnt++] = A->j[k] + cstart; 1206 } 1207 for (; j<B->i[i+1]; j++) { 1208 column_indices[cnt++] = garray[B->j[j]]; 1209 } 1210 } 1211 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1212 1213 /* store the column indices to the file */ 1214 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1215 if (!rank) { 1216 MPI_Status status; 1217 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1218 for (i=1; i<size; i++) { 1219 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1220 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1221 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1222 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1223 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1224 } 1225 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1226 } else { 1227 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1228 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1229 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1230 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1231 } 1232 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1233 1234 /* load up the local column values */ 1235 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1236 cnt = 0; 1237 for (i=0; i<mat->rmap->n; i++) { 1238 for (j=B->i[i]; j<B->i[i+1]; j++) { 1239 if ( garray[B->j[j]] > cstart) break; 1240 column_values[cnt++] = B->a[j]; 1241 } 1242 for (k=A->i[i]; k<A->i[i+1]; k++) { 1243 column_values[cnt++] = A->a[k]; 1244 } 1245 for (; j<B->i[i+1]; j++) { 1246 column_values[cnt++] = B->a[j]; 1247 } 1248 } 1249 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1250 1251 /* store the column values to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 MPI_Status status; 1255 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1256 for (i=1; i<size; i++) { 1257 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1258 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1259 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1260 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1264 } else { 1265 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1266 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1267 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1268 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(column_values);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 PetscMPIInt rank = aij->rank,size = aij->size; 1281 PetscBool isdraw,iascii,isbinary; 1282 PetscViewer sviewer; 1283 PetscViewerFormat format; 1284 1285 PetscFunctionBegin; 1286 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1287 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1288 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1289 if (iascii) { 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscBool inodes; 1294 1295 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1296 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1297 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1298 if (!inodes) { 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1301 } else { 1302 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1303 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1304 } 1305 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1307 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1309 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1310 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1311 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1314 PetscInt inodecount,inodelimit,*inodes; 1315 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1316 if (inodes) { 1317 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1320 } 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1323 PetscFunctionReturn(0); 1324 } 1325 } else if (isbinary) { 1326 if (size == 1) { 1327 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1328 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1329 } else { 1330 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1331 } 1332 PetscFunctionReturn(0); 1333 } else if (isdraw) { 1334 PetscDraw draw; 1335 PetscBool isnull; 1336 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1337 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1338 } 1339 1340 if (size == 1) { 1341 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1342 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1343 } else { 1344 /* assemble the entire matrix onto first processor. */ 1345 Mat A; 1346 Mat_SeqAIJ *Aloc; 1347 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1348 MatScalar *a; 1349 1350 if (mat->rmap->N > 1024) { 1351 PetscBool flg = PETSC_FALSE; 1352 1353 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1354 if (!flg) { 1355 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1356 } 1357 } 1358 1359 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1360 if (!rank) { 1361 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1362 } else { 1363 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1364 } 1365 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1366 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1367 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1368 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1369 1370 /* copy over the A part */ 1371 Aloc = (Mat_SeqAIJ*)aij->A->data; 1372 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1373 row = mat->rmap->rstart; 1374 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1375 for (i=0; i<m; i++) { 1376 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1377 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1378 } 1379 aj = Aloc->j; 1380 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1381 1382 /* copy over the B part */ 1383 Aloc = (Mat_SeqAIJ*)aij->B->data; 1384 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1385 row = mat->rmap->rstart; 1386 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1387 ct = cols; 1388 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1392 } 1393 ierr = PetscFree(ct);CHKERRQ(ierr); 1394 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 /* 1397 Everyone has to call to draw the matrix since the graphics waits are 1398 synchronized across all processors that share the PetscDraw object 1399 */ 1400 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1401 if (!rank) { 1402 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1403 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1404 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1405 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1406 } 1407 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1408 ierr = MatDestroy(A);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatView_MPIAIJ" 1415 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1416 { 1417 PetscErrorCode ierr; 1418 PetscBool iascii,isdraw,issocket,isbinary; 1419 1420 PetscFunctionBegin; 1421 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1422 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1423 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1424 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1425 if (iascii || isdraw || isbinary || issocket) { 1426 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1427 } else { 1428 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSOR_MPIAIJ" 1435 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1436 { 1437 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1438 PetscErrorCode ierr; 1439 Vec bb1 = 0; 1440 PetscBool hasop; 1441 1442 PetscFunctionBegin; 1443 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1444 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1445 } 1446 1447 if (flag == SOR_APPLY_UPPER) { 1448 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1453 if (flag & SOR_ZERO_INITIAL_GUESS) { 1454 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1455 its--; 1456 } 1457 1458 while (its--) { 1459 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 1462 /* update rhs: bb1 = bb - B*x */ 1463 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1464 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1465 1466 /* local sweep */ 1467 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1468 } 1469 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1472 its--; 1473 } 1474 while (its--) { 1475 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1476 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1480 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1481 1482 /* local sweep */ 1483 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1484 } 1485 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1486 if (flag & SOR_ZERO_INITIAL_GUESS) { 1487 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1488 its--; 1489 } 1490 while (its--) { 1491 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 1494 /* update rhs: bb1 = bb - B*x */ 1495 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1497 1498 /* local sweep */ 1499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1500 } 1501 } else if (flag & SOR_EISENSTAT) { 1502 Vec xx1; 1503 1504 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1505 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1506 1507 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 if (!mat->diag) { 1510 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1511 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1512 } 1513 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1514 if (hasop) { 1515 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1516 } else { 1517 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1518 } 1519 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1520 1521 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1522 1523 /* local sweep */ 1524 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1525 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1526 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1527 } else { 1528 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1529 } 1530 1531 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatPermute_MPIAIJ" 1537 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1538 { 1539 MPI_Comm comm,pcomm; 1540 PetscInt first,local_size,nrows; 1541 const PetscInt *rows; 1542 PetscMPIInt size; 1543 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1548 /* make a collective version of 'rowp' */ 1549 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1550 if (pcomm==comm) { 1551 crowp = rowp; 1552 } else { 1553 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1554 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1555 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1556 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1557 } 1558 /* collect the global row permutation and invert it */ 1559 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1560 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1561 if (pcomm!=comm) { 1562 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1563 } 1564 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1565 /* get the local target indices */ 1566 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1567 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1568 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1569 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1570 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1571 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1572 /* the column permutation is so much easier; 1573 make a local version of 'colp' and invert it */ 1574 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1575 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1576 if (size==1) { 1577 lcolp = colp; 1578 } else { 1579 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1580 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1581 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1582 } 1583 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1584 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1585 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1586 if (size>1) { 1587 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1588 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1589 } 1590 /* now we just get the submatrix */ 1591 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1592 /* clean up */ 1593 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1594 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1600 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 Mat A = mat->A,B = mat->B; 1604 PetscErrorCode ierr; 1605 PetscReal isend[5],irecv[5]; 1606 1607 PetscFunctionBegin; 1608 info->block_size = 1.0; 1609 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1610 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1611 isend[3] = info->memory; isend[4] = info->mallocs; 1612 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1613 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1614 isend[3] += info->memory; isend[4] += info->mallocs; 1615 if (flag == MAT_LOCAL) { 1616 info->nz_used = isend[0]; 1617 info->nz_allocated = isend[1]; 1618 info->nz_unneeded = isend[2]; 1619 info->memory = isend[3]; 1620 info->mallocs = isend[4]; 1621 } else if (flag == MAT_GLOBAL_MAX) { 1622 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1623 info->nz_used = irecv[0]; 1624 info->nz_allocated = irecv[1]; 1625 info->nz_unneeded = irecv[2]; 1626 info->memory = irecv[3]; 1627 info->mallocs = irecv[4]; 1628 } else if (flag == MAT_GLOBAL_SUM) { 1629 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1630 info->nz_used = irecv[0]; 1631 info->nz_allocated = irecv[1]; 1632 info->nz_unneeded = irecv[2]; 1633 info->memory = irecv[3]; 1634 info->mallocs = irecv[4]; 1635 } 1636 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1637 info->fill_ratio_needed = 0; 1638 info->factor_mallocs = 0; 1639 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatSetOption_MPIAIJ" 1645 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1646 { 1647 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 switch (op) { 1652 case MAT_NEW_NONZERO_LOCATIONS: 1653 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1654 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1655 case MAT_KEEP_NONZERO_PATTERN: 1656 case MAT_NEW_NONZERO_LOCATION_ERR: 1657 case MAT_USE_INODES: 1658 case MAT_IGNORE_ZERO_ENTRIES: 1659 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1660 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1661 break; 1662 case MAT_ROW_ORIENTED: 1663 a->roworiented = flg; 1664 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1665 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1666 break; 1667 case MAT_NEW_DIAGONALS: 1668 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1669 break; 1670 case MAT_IGNORE_OFF_PROC_ENTRIES: 1671 a->donotstash = PETSC_TRUE; 1672 break; 1673 case MAT_SPD: 1674 A->spd_set = PETSC_TRUE; 1675 A->spd = flg; 1676 if (flg) { 1677 A->symmetric = PETSC_TRUE; 1678 A->structurally_symmetric = PETSC_TRUE; 1679 A->symmetric_set = PETSC_TRUE; 1680 A->structurally_symmetric_set = PETSC_TRUE; 1681 } 1682 break; 1683 case MAT_SYMMETRIC: 1684 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1685 break; 1686 case MAT_STRUCTURALLY_SYMMETRIC: 1687 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1688 break; 1689 case MAT_HERMITIAN: 1690 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1691 break; 1692 case MAT_SYMMETRY_ETERNAL: 1693 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1694 break; 1695 default: 1696 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatGetRow_MPIAIJ" 1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1704 { 1705 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1706 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1707 PetscErrorCode ierr; 1708 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1709 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1710 PetscInt *cmap,*idx_p; 1711 1712 PetscFunctionBegin; 1713 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1714 mat->getrowactive = PETSC_TRUE; 1715 1716 if (!mat->rowvalues && (idx || v)) { 1717 /* 1718 allocate enough space to hold information from the longest row. 1719 */ 1720 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1721 PetscInt max = 1,tmp; 1722 for (i=0; i<matin->rmap->n; i++) { 1723 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1724 if (max < tmp) { max = tmp; } 1725 } 1726 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1727 } 1728 1729 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1730 lrow = row - rstart; 1731 1732 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1733 if (!v) {pvA = 0; pvB = 0;} 1734 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1735 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1736 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1737 nztot = nzA + nzB; 1738 1739 cmap = mat->garray; 1740 if (v || idx) { 1741 if (nztot) { 1742 /* Sort by increasing column numbers, assuming A and B already sorted */ 1743 PetscInt imark = -1; 1744 if (v) { 1745 *v = v_p = mat->rowvalues; 1746 for (i=0; i<nzB; i++) { 1747 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1748 else break; 1749 } 1750 imark = i; 1751 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1752 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1753 } 1754 if (idx) { 1755 *idx = idx_p = mat->rowindices; 1756 if (imark > -1) { 1757 for (i=0; i<imark; i++) { 1758 idx_p[i] = cmap[cworkB[i]]; 1759 } 1760 } else { 1761 for (i=0; i<nzB; i++) { 1762 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1763 else break; 1764 } 1765 imark = i; 1766 } 1767 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1768 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1769 } 1770 } else { 1771 if (idx) *idx = 0; 1772 if (v) *v = 0; 1773 } 1774 } 1775 *nz = nztot; 1776 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1777 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1783 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1784 { 1785 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1786 1787 PetscFunctionBegin; 1788 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1789 aij->getrowactive = PETSC_FALSE; 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatNorm_MPIAIJ" 1795 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1796 { 1797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1798 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1799 PetscErrorCode ierr; 1800 PetscInt i,j,cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 MatScalar *v; 1803 1804 PetscFunctionBegin; 1805 if (aij->size == 1) { 1806 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1807 } else { 1808 if (type == NORM_FROBENIUS) { 1809 v = amat->a; 1810 for (i=0; i<amat->nz; i++) { 1811 #if defined(PETSC_USE_COMPLEX) 1812 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1813 #else 1814 sum += (*v)*(*v); v++; 1815 #endif 1816 } 1817 v = bmat->a; 1818 for (i=0; i<bmat->nz; i++) { 1819 #if defined(PETSC_USE_COMPLEX) 1820 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1821 #else 1822 sum += (*v)*(*v); v++; 1823 #endif 1824 } 1825 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1826 *norm = sqrt(*norm); 1827 } else if (type == NORM_1) { /* max column norm */ 1828 PetscReal *tmp,*tmp2; 1829 PetscInt *jj,*garray = aij->garray; 1830 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1831 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1832 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1833 *norm = 0.0; 1834 v = amat->a; jj = amat->j; 1835 for (j=0; j<amat->nz; j++) { 1836 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1837 } 1838 v = bmat->a; jj = bmat->j; 1839 for (j=0; j<bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1841 } 1842 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1843 for (j=0; j<mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 ierr = PetscFree(tmp);CHKERRQ(ierr); 1847 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1848 } else if (type == NORM_INFINITY) { /* max row norm */ 1849 PetscReal ntemp = 0.0; 1850 for (j=0; j<aij->A->rmap->n; j++) { 1851 v = amat->a + amat->i[j]; 1852 sum = 0.0; 1853 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1854 sum += PetscAbsScalar(*v); v++; 1855 } 1856 v = bmat->a + bmat->i[j]; 1857 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); v++; 1859 } 1860 if (sum > ntemp) ntemp = sum; 1861 } 1862 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1863 } else { 1864 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1865 } 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatTranspose_MPIAIJ" 1872 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1873 { 1874 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1875 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1876 PetscErrorCode ierr; 1877 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1878 PetscInt cstart=A->cmap->rstart,ncol; 1879 Mat B; 1880 MatScalar *array; 1881 1882 PetscFunctionBegin; 1883 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1884 1885 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1886 ai = Aloc->i; aj = Aloc->j; 1887 bi = Bloc->i; bj = Bloc->j; 1888 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1889 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1890 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1891 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1892 for (i=0; i<ai[ma]; i++){ 1893 d_nnz[aj[i]] ++; 1894 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1895 } 1896 1897 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1898 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1899 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1900 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1901 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1902 } else { 1903 B = *matout; 1904 } 1905 1906 /* copy over the A part */ 1907 array = Aloc->a; 1908 row = A->rmap->rstart; 1909 for (i=0; i<ma; i++) { 1910 ncol = ai[i+1]-ai[i]; 1911 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1912 row++; array += ncol; aj += ncol; 1913 } 1914 aj = Aloc->j; 1915 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1916 1917 /* copy over the B part */ 1918 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1919 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1920 array = Bloc->a; 1921 row = A->rmap->rstart; 1922 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1923 cols_tmp = cols; 1924 for (i=0; i<mb; i++) { 1925 ncol = bi[i+1]-bi[i]; 1926 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1927 row++; array += ncol; cols_tmp += ncol; 1928 } 1929 ierr = PetscFree(cols);CHKERRQ(ierr); 1930 1931 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1932 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1934 *matout = B; 1935 } else { 1936 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1937 } 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1943 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1944 { 1945 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1946 Mat a = aij->A,b = aij->B; 1947 PetscErrorCode ierr; 1948 PetscInt s1,s2,s3; 1949 1950 PetscFunctionBegin; 1951 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1952 if (rr) { 1953 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1954 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1955 /* Overlap communication with computation. */ 1956 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1957 } 1958 if (ll) { 1959 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1960 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1961 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1962 } 1963 /* scale the diagonal block */ 1964 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1965 1966 if (rr) { 1967 /* Do a scatter end and then right scale the off-diagonal block */ 1968 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1969 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1970 } 1971 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1977 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1978 { 1979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1980 PetscErrorCode ierr; 1981 1982 PetscFunctionBegin; 1983 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1984 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1985 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1986 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1987 PetscFunctionReturn(0); 1988 } 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1991 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1992 { 1993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatEqual_MPIAIJ" 2003 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2004 { 2005 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2006 Mat a,b,c,d; 2007 PetscBool flg; 2008 PetscErrorCode ierr; 2009 2010 PetscFunctionBegin; 2011 a = matA->A; b = matA->B; 2012 c = matB->A; d = matB->B; 2013 2014 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2015 if (flg) { 2016 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2017 } 2018 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatCopy_MPIAIJ" 2024 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2025 { 2026 PetscErrorCode ierr; 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2028 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2029 2030 PetscFunctionBegin; 2031 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2032 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2033 /* because of the column compression in the off-processor part of the matrix a->B, 2034 the number of columns in a->B and b->B may be different, hence we cannot call 2035 the MatCopy() directly on the two parts. If need be, we can provide a more 2036 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2037 then copying the submatrices */ 2038 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2039 } else { 2040 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2041 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2042 } 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2048 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2049 { 2050 PetscErrorCode ierr; 2051 2052 PetscFunctionBegin; 2053 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatAXPY_MPIAIJ" 2059 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2060 { 2061 PetscErrorCode ierr; 2062 PetscInt i; 2063 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2064 PetscBLASInt bnz,one=1; 2065 Mat_SeqAIJ *x,*y; 2066 2067 PetscFunctionBegin; 2068 if (str == SAME_NONZERO_PATTERN) { 2069 PetscScalar alpha = a; 2070 x = (Mat_SeqAIJ *)xx->A->data; 2071 y = (Mat_SeqAIJ *)yy->A->data; 2072 bnz = PetscBLASIntCast(x->nz); 2073 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2074 x = (Mat_SeqAIJ *)xx->B->data; 2075 y = (Mat_SeqAIJ *)yy->B->data; 2076 bnz = PetscBLASIntCast(x->nz); 2077 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2078 } else if (str == SUBSET_NONZERO_PATTERN) { 2079 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2080 2081 x = (Mat_SeqAIJ *)xx->B->data; 2082 y = (Mat_SeqAIJ *)yy->B->data; 2083 if (y->xtoy && y->XtoY != xx->B) { 2084 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2085 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2086 } 2087 if (!y->xtoy) { /* get xtoy */ 2088 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2089 y->XtoY = xx->B; 2090 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2091 } 2092 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2093 } else { 2094 Mat B; 2095 PetscInt *nnz_d,*nnz_o; 2096 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2097 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2098 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2099 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2100 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2101 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2102 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2103 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2104 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2105 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2106 ierr = MatHeaderReplace(Y,B); 2107 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2108 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatConjugate_MPIAIJ" 2117 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2118 { 2119 #if defined(PETSC_USE_COMPLEX) 2120 PetscErrorCode ierr; 2121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2122 2123 PetscFunctionBegin; 2124 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2125 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2126 #else 2127 PetscFunctionBegin; 2128 #endif 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatRealPart_MPIAIJ" 2134 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2135 { 2136 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2141 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2147 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2154 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #ifdef PETSC_HAVE_PBGL 2159 2160 #include <boost/parallel/mpi/bsp_process_group.hpp> 2161 #include <boost/graph/distributed/ilu_default_graph.hpp> 2162 #include <boost/graph/distributed/ilu_0_block.hpp> 2163 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2164 #include <boost/graph/distributed/petsc/interface.hpp> 2165 #include <boost/multi_array.hpp> 2166 #include <boost/parallel/distributed_property_map->hpp> 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2170 /* 2171 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2172 */ 2173 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2174 { 2175 namespace petsc = boost::distributed::petsc; 2176 2177 namespace graph_dist = boost::graph::distributed; 2178 using boost::graph::distributed::ilu_default::process_group_type; 2179 using boost::graph::ilu_permuted; 2180 2181 PetscBool row_identity, col_identity; 2182 PetscContainer c; 2183 PetscInt m, n, M, N; 2184 PetscErrorCode ierr; 2185 2186 PetscFunctionBegin; 2187 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2188 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2189 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2190 if (!row_identity || !col_identity) { 2191 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2192 } 2193 2194 process_group_type pg; 2195 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2196 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2197 lgraph_type& level_graph = *lgraph_p; 2198 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2199 2200 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2201 ilu_permuted(level_graph); 2202 2203 /* put together the new matrix */ 2204 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2205 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2206 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2207 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2208 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2209 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2210 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 2212 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2213 ierr = PetscContainerSetPointer(c, lgraph_p); 2214 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 #undef __FUNCT__ 2219 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2220 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2221 { 2222 PetscFunctionBegin; 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatSolve_MPIAIJ" 2228 /* 2229 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2230 */ 2231 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2232 { 2233 namespace graph_dist = boost::graph::distributed; 2234 2235 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2236 lgraph_type* lgraph_p; 2237 PetscContainer c; 2238 PetscErrorCode ierr; 2239 2240 PetscFunctionBegin; 2241 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2242 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2243 ierr = VecCopy(b, x);CHKERRQ(ierr); 2244 2245 PetscScalar* array_x; 2246 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2247 PetscInt sx; 2248 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2249 2250 PetscScalar* array_b; 2251 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2252 PetscInt sb; 2253 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2254 2255 lgraph_type& level_graph = *lgraph_p; 2256 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2257 2258 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2259 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2260 ref_x(array_x, boost::extents[num_vertices(graph)]); 2261 2262 typedef boost::iterator_property_map<array_ref_type::iterator, 2263 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2264 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2265 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2266 2267 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2268 2269 PetscFunctionReturn(0); 2270 } 2271 #endif 2272 2273 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2274 PetscInt nzlocal,nsends,nrecvs; 2275 PetscMPIInt *send_rank,*recv_rank; 2276 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2277 PetscScalar *sbuf_a,**rbuf_a; 2278 PetscErrorCode (*MatDestroy)(Mat); 2279 } Mat_Redundant; 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2283 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2284 { 2285 PetscErrorCode ierr; 2286 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2287 PetscInt i; 2288 2289 PetscFunctionBegin; 2290 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2291 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2292 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2293 for (i=0; i<redund->nrecvs; i++){ 2294 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2295 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2296 } 2297 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2298 ierr = PetscFree(redund);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatDestroy_MatRedundant" 2304 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2305 { 2306 PetscErrorCode ierr; 2307 PetscContainer container; 2308 Mat_Redundant *redund=PETSC_NULL; 2309 2310 PetscFunctionBegin; 2311 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2312 if (container) { 2313 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2314 } else { 2315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2316 } 2317 A->ops->destroy = redund->MatDestroy; 2318 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2319 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2320 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2326 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2327 { 2328 PetscMPIInt rank,size; 2329 MPI_Comm comm=((PetscObject)mat)->comm; 2330 PetscErrorCode ierr; 2331 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2332 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2333 PetscInt *rowrange=mat->rmap->range; 2334 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2335 Mat A=aij->A,B=aij->B,C=*matredundant; 2336 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2337 PetscScalar *sbuf_a; 2338 PetscInt nzlocal=a->nz+b->nz; 2339 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2340 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2341 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2342 MatScalar *aworkA,*aworkB; 2343 PetscScalar *vals; 2344 PetscMPIInt tag1,tag2,tag3,imdex; 2345 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2346 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2347 MPI_Status recv_status,*send_status; 2348 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2349 PetscInt **rbuf_j=PETSC_NULL; 2350 PetscScalar **rbuf_a=PETSC_NULL; 2351 Mat_Redundant *redund=PETSC_NULL; 2352 PetscContainer container; 2353 2354 PetscFunctionBegin; 2355 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2356 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2357 2358 if (reuse == MAT_REUSE_MATRIX) { 2359 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2360 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2361 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2362 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2363 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2364 if (container) { 2365 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2366 } else { 2367 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2368 } 2369 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2370 2371 nsends = redund->nsends; 2372 nrecvs = redund->nrecvs; 2373 send_rank = redund->send_rank; 2374 recv_rank = redund->recv_rank; 2375 sbuf_nz = redund->sbuf_nz; 2376 rbuf_nz = redund->rbuf_nz; 2377 sbuf_j = redund->sbuf_j; 2378 sbuf_a = redund->sbuf_a; 2379 rbuf_j = redund->rbuf_j; 2380 rbuf_a = redund->rbuf_a; 2381 } 2382 2383 if (reuse == MAT_INITIAL_MATRIX){ 2384 PetscMPIInt subrank,subsize; 2385 PetscInt nleftover,np_subcomm; 2386 /* get the destination processors' id send_rank, nsends and nrecvs */ 2387 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2388 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2389 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2390 np_subcomm = size/nsubcomm; 2391 nleftover = size - nsubcomm*np_subcomm; 2392 nsends = 0; nrecvs = 0; 2393 for (i=0; i<size; i++){ /* i=rank*/ 2394 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2395 send_rank[nsends] = i; nsends++; 2396 recv_rank[nrecvs++] = i; 2397 } 2398 } 2399 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2400 i = size-nleftover-1; 2401 j = 0; 2402 while (j < nsubcomm - nleftover){ 2403 send_rank[nsends++] = i; 2404 i--; j++; 2405 } 2406 } 2407 2408 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2409 for (i=0; i<nleftover; i++){ 2410 recv_rank[nrecvs++] = size-nleftover+i; 2411 } 2412 } 2413 2414 /* allocate sbuf_j, sbuf_a */ 2415 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2416 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2417 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2418 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2419 2420 /* copy mat's local entries into the buffers */ 2421 if (reuse == MAT_INITIAL_MATRIX){ 2422 rownz_max = 0; 2423 rptr = sbuf_j; 2424 cols = sbuf_j + rend-rstart + 1; 2425 vals = sbuf_a; 2426 rptr[0] = 0; 2427 for (i=0; i<rend-rstart; i++){ 2428 row = i + rstart; 2429 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2430 ncols = nzA + nzB; 2431 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2432 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2433 /* load the column indices for this row into cols */ 2434 lwrite = 0; 2435 for (l=0; l<nzB; l++) { 2436 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2437 vals[lwrite] = aworkB[l]; 2438 cols[lwrite++] = ctmp; 2439 } 2440 } 2441 for (l=0; l<nzA; l++){ 2442 vals[lwrite] = aworkA[l]; 2443 cols[lwrite++] = cstart + cworkA[l]; 2444 } 2445 for (l=0; l<nzB; l++) { 2446 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2447 vals[lwrite] = aworkB[l]; 2448 cols[lwrite++] = ctmp; 2449 } 2450 } 2451 vals += ncols; 2452 cols += ncols; 2453 rptr[i+1] = rptr[i] + ncols; 2454 if (rownz_max < ncols) rownz_max = ncols; 2455 } 2456 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2457 } else { /* only copy matrix values into sbuf_a */ 2458 rptr = sbuf_j; 2459 vals = sbuf_a; 2460 rptr[0] = 0; 2461 for (i=0; i<rend-rstart; i++){ 2462 row = i + rstart; 2463 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2464 ncols = nzA + nzB; 2465 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2466 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2467 lwrite = 0; 2468 for (l=0; l<nzB; l++) { 2469 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2470 } 2471 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2472 for (l=0; l<nzB; l++) { 2473 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2474 } 2475 vals += ncols; 2476 rptr[i+1] = rptr[i] + ncols; 2477 } 2478 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2479 2480 /* send nzlocal to others, and recv other's nzlocal */ 2481 /*--------------------------------------------------*/ 2482 if (reuse == MAT_INITIAL_MATRIX){ 2483 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2484 s_waits2 = s_waits3 + nsends; 2485 s_waits1 = s_waits2 + nsends; 2486 r_waits1 = s_waits1 + nsends; 2487 r_waits2 = r_waits1 + nrecvs; 2488 r_waits3 = r_waits2 + nrecvs; 2489 } else { 2490 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2491 r_waits3 = s_waits3 + nsends; 2492 } 2493 2494 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2495 if (reuse == MAT_INITIAL_MATRIX){ 2496 /* get new tags to keep the communication clean */ 2497 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2498 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2499 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2500 2501 /* post receives of other's nzlocal */ 2502 for (i=0; i<nrecvs; i++){ 2503 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2504 } 2505 /* send nzlocal to others */ 2506 for (i=0; i<nsends; i++){ 2507 sbuf_nz[i] = nzlocal; 2508 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2509 } 2510 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2511 count = nrecvs; 2512 while (count) { 2513 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2514 recv_rank[imdex] = recv_status.MPI_SOURCE; 2515 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2516 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2517 2518 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2519 rbuf_nz[imdex] += i + 2; 2520 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2521 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2522 count--; 2523 } 2524 /* wait on sends of nzlocal */ 2525 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2526 /* send mat->i,j to others, and recv from other's */ 2527 /*------------------------------------------------*/ 2528 for (i=0; i<nsends; i++){ 2529 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2530 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2531 } 2532 /* wait on receives of mat->i,j */ 2533 /*------------------------------*/ 2534 count = nrecvs; 2535 while (count) { 2536 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2537 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2538 count--; 2539 } 2540 /* wait on sends of mat->i,j */ 2541 /*---------------------------*/ 2542 if (nsends) { 2543 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2544 } 2545 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2546 2547 /* post receives, send and receive mat->a */ 2548 /*----------------------------------------*/ 2549 for (imdex=0; imdex<nrecvs; imdex++) { 2550 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2551 } 2552 for (i=0; i<nsends; i++){ 2553 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2554 } 2555 count = nrecvs; 2556 while (count) { 2557 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2558 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2559 count--; 2560 } 2561 if (nsends) { 2562 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2563 } 2564 2565 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2566 2567 /* create redundant matrix */ 2568 /*-------------------------*/ 2569 if (reuse == MAT_INITIAL_MATRIX){ 2570 /* compute rownz_max for preallocation */ 2571 for (imdex=0; imdex<nrecvs; imdex++){ 2572 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2573 rptr = rbuf_j[imdex]; 2574 for (i=0; i<j; i++){ 2575 ncols = rptr[i+1] - rptr[i]; 2576 if (rownz_max < ncols) rownz_max = ncols; 2577 } 2578 } 2579 2580 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2581 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2582 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2583 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2584 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2585 } else { 2586 C = *matredundant; 2587 } 2588 2589 /* insert local matrix entries */ 2590 rptr = sbuf_j; 2591 cols = sbuf_j + rend-rstart + 1; 2592 vals = sbuf_a; 2593 for (i=0; i<rend-rstart; i++){ 2594 row = i + rstart; 2595 ncols = rptr[i+1] - rptr[i]; 2596 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2597 vals += ncols; 2598 cols += ncols; 2599 } 2600 /* insert received matrix entries */ 2601 for (imdex=0; imdex<nrecvs; imdex++){ 2602 rstart = rowrange[recv_rank[imdex]]; 2603 rend = rowrange[recv_rank[imdex]+1]; 2604 rptr = rbuf_j[imdex]; 2605 cols = rbuf_j[imdex] + rend-rstart + 1; 2606 vals = rbuf_a[imdex]; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 ncols = rptr[i+1] - rptr[i]; 2610 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2611 vals += ncols; 2612 cols += ncols; 2613 } 2614 } 2615 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2616 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2617 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2618 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2619 if (reuse == MAT_INITIAL_MATRIX){ 2620 PetscContainer container; 2621 *matredundant = C; 2622 /* create a supporting struct and attach it to C for reuse */ 2623 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2624 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2625 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2626 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2627 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2628 2629 redund->nzlocal = nzlocal; 2630 redund->nsends = nsends; 2631 redund->nrecvs = nrecvs; 2632 redund->send_rank = send_rank; 2633 redund->recv_rank = recv_rank; 2634 redund->sbuf_nz = sbuf_nz; 2635 redund->rbuf_nz = rbuf_nz; 2636 redund->sbuf_j = sbuf_j; 2637 redund->sbuf_a = sbuf_a; 2638 redund->rbuf_j = rbuf_j; 2639 redund->rbuf_a = rbuf_a; 2640 2641 redund->MatDestroy = C->ops->destroy; 2642 C->ops->destroy = MatDestroy_MatRedundant; 2643 } 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2649 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 PetscErrorCode ierr; 2653 PetscInt i,*idxb = 0; 2654 PetscScalar *va,*vb; 2655 Vec vtmp; 2656 2657 PetscFunctionBegin; 2658 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2659 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2660 if (idx) { 2661 for (i=0; i<A->rmap->n; i++) { 2662 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2663 } 2664 } 2665 2666 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2667 if (idx) { 2668 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2669 } 2670 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2671 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2672 2673 for (i=0; i<A->rmap->n; i++){ 2674 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2675 va[i] = vb[i]; 2676 if (idx) idx[i] = a->garray[idxb[i]]; 2677 } 2678 } 2679 2680 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2681 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2682 if (idxb) { 2683 ierr = PetscFree(idxb);CHKERRQ(ierr); 2684 } 2685 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2686 PetscFunctionReturn(0); 2687 } 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2691 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2694 PetscErrorCode ierr; 2695 PetscInt i,*idxb = 0; 2696 PetscScalar *va,*vb; 2697 Vec vtmp; 2698 2699 PetscFunctionBegin; 2700 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2701 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2702 if (idx) { 2703 for (i=0; i<A->cmap->n; i++) { 2704 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2705 } 2706 } 2707 2708 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2709 if (idx) { 2710 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2711 } 2712 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2713 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2714 2715 for (i=0; i<A->rmap->n; i++){ 2716 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2717 va[i] = vb[i]; 2718 if (idx) idx[i] = a->garray[idxb[i]]; 2719 } 2720 } 2721 2722 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2723 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2724 if (idxb) { 2725 ierr = PetscFree(idxb);CHKERRQ(ierr); 2726 } 2727 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2733 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2734 { 2735 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2736 PetscInt n = A->rmap->n; 2737 PetscInt cstart = A->cmap->rstart; 2738 PetscInt *cmap = mat->garray; 2739 PetscInt *diagIdx, *offdiagIdx; 2740 Vec diagV, offdiagV; 2741 PetscScalar *a, *diagA, *offdiagA; 2742 PetscInt r; 2743 PetscErrorCode ierr; 2744 2745 PetscFunctionBegin; 2746 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2747 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2748 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2749 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2750 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2751 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2752 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2753 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2754 for(r = 0; r < n; ++r) { 2755 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2756 a[r] = diagA[r]; 2757 idx[r] = cstart + diagIdx[r]; 2758 } else { 2759 a[r] = offdiagA[r]; 2760 idx[r] = cmap[offdiagIdx[r]]; 2761 } 2762 } 2763 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2764 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2765 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2766 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2767 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2768 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2769 PetscFunctionReturn(0); 2770 } 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2774 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2775 { 2776 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2777 PetscInt n = A->rmap->n; 2778 PetscInt cstart = A->cmap->rstart; 2779 PetscInt *cmap = mat->garray; 2780 PetscInt *diagIdx, *offdiagIdx; 2781 Vec diagV, offdiagV; 2782 PetscScalar *a, *diagA, *offdiagA; 2783 PetscInt r; 2784 PetscErrorCode ierr; 2785 2786 PetscFunctionBegin; 2787 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2788 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2789 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2790 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2791 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2792 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2793 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2794 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2795 for(r = 0; r < n; ++r) { 2796 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2797 a[r] = diagA[r]; 2798 idx[r] = cstart + diagIdx[r]; 2799 } else { 2800 a[r] = offdiagA[r]; 2801 idx[r] = cmap[offdiagIdx[r]]; 2802 } 2803 } 2804 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2805 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2806 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2807 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2808 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2809 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 #undef __FUNCT__ 2814 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2815 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2816 { 2817 PetscErrorCode ierr; 2818 Mat *dummy; 2819 2820 PetscFunctionBegin; 2821 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2822 *newmat = *dummy; 2823 ierr = PetscFree(dummy);CHKERRQ(ierr); 2824 PetscFunctionReturn(0); 2825 } 2826 2827 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2828 /* -------------------------------------------------------------------*/ 2829 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2830 MatGetRow_MPIAIJ, 2831 MatRestoreRow_MPIAIJ, 2832 MatMult_MPIAIJ, 2833 /* 4*/ MatMultAdd_MPIAIJ, 2834 MatMultTranspose_MPIAIJ, 2835 MatMultTransposeAdd_MPIAIJ, 2836 #ifdef PETSC_HAVE_PBGL 2837 MatSolve_MPIAIJ, 2838 #else 2839 0, 2840 #endif 2841 0, 2842 0, 2843 /*10*/ 0, 2844 0, 2845 0, 2846 MatSOR_MPIAIJ, 2847 MatTranspose_MPIAIJ, 2848 /*15*/ MatGetInfo_MPIAIJ, 2849 MatEqual_MPIAIJ, 2850 MatGetDiagonal_MPIAIJ, 2851 MatDiagonalScale_MPIAIJ, 2852 MatNorm_MPIAIJ, 2853 /*20*/ MatAssemblyBegin_MPIAIJ, 2854 MatAssemblyEnd_MPIAIJ, 2855 MatSetOption_MPIAIJ, 2856 MatZeroEntries_MPIAIJ, 2857 /*24*/ MatZeroRows_MPIAIJ, 2858 0, 2859 #ifdef PETSC_HAVE_PBGL 2860 0, 2861 #else 2862 0, 2863 #endif 2864 0, 2865 0, 2866 /*29*/ MatSetUpPreallocation_MPIAIJ, 2867 #ifdef PETSC_HAVE_PBGL 2868 0, 2869 #else 2870 0, 2871 #endif 2872 0, 2873 0, 2874 0, 2875 /*34*/ MatDuplicate_MPIAIJ, 2876 0, 2877 0, 2878 0, 2879 0, 2880 /*39*/ MatAXPY_MPIAIJ, 2881 MatGetSubMatrices_MPIAIJ, 2882 MatIncreaseOverlap_MPIAIJ, 2883 MatGetValues_MPIAIJ, 2884 MatCopy_MPIAIJ, 2885 /*44*/ MatGetRowMax_MPIAIJ, 2886 MatScale_MPIAIJ, 2887 0, 2888 0, 2889 MatZeroRowsColumns_MPIAIJ, 2890 /*49*/ MatSetBlockSize_MPIAIJ, 2891 0, 2892 0, 2893 0, 2894 0, 2895 /*54*/ MatFDColoringCreate_MPIAIJ, 2896 0, 2897 MatSetUnfactored_MPIAIJ, 2898 MatPermute_MPIAIJ, 2899 0, 2900 /*59*/ MatGetSubMatrix_MPIAIJ, 2901 MatDestroy_MPIAIJ, 2902 MatView_MPIAIJ, 2903 0, 2904 0, 2905 /*64*/ 0, 2906 0, 2907 0, 2908 0, 2909 0, 2910 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2911 MatGetRowMinAbs_MPIAIJ, 2912 0, 2913 MatSetColoring_MPIAIJ, 2914 #if defined(PETSC_HAVE_ADIC) 2915 MatSetValuesAdic_MPIAIJ, 2916 #else 2917 0, 2918 #endif 2919 MatSetValuesAdifor_MPIAIJ, 2920 /*75*/ MatFDColoringApply_AIJ, 2921 0, 2922 0, 2923 0, 2924 0, 2925 /*80*/ 0, 2926 0, 2927 0, 2928 /*83*/ MatLoad_MPIAIJ, 2929 0, 2930 0, 2931 0, 2932 0, 2933 0, 2934 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2935 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2936 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2937 MatPtAP_Basic, 2938 MatPtAPSymbolic_MPIAIJ, 2939 /*94*/ MatPtAPNumeric_MPIAIJ, 2940 0, 2941 0, 2942 0, 2943 0, 2944 /*99*/ 0, 2945 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2946 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2947 MatConjugate_MPIAIJ, 2948 0, 2949 /*104*/MatSetValuesRow_MPIAIJ, 2950 MatRealPart_MPIAIJ, 2951 MatImaginaryPart_MPIAIJ, 2952 0, 2953 0, 2954 /*109*/0, 2955 MatGetRedundantMatrix_MPIAIJ, 2956 MatGetRowMin_MPIAIJ, 2957 0, 2958 0, 2959 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2960 0, 2961 0, 2962 0, 2963 0, 2964 /*119*/0, 2965 0, 2966 0, 2967 0, 2968 MatGetMultiProcBlock_MPIAIJ, 2969 /*124*/0, 2970 0, 2971 0, 2972 0, 2973 MatGetSubMatricesParallel_MPIAIJ 2974 }; 2975 2976 /* ----------------------------------------------------------------------------------------*/ 2977 2978 EXTERN_C_BEGIN 2979 #undef __FUNCT__ 2980 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2981 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2982 { 2983 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2988 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2989 PetscFunctionReturn(0); 2990 } 2991 EXTERN_C_END 2992 2993 EXTERN_C_BEGIN 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2996 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2997 { 2998 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2999 PetscErrorCode ierr; 3000 3001 PetscFunctionBegin; 3002 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3003 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 EXTERN_C_END 3007 3008 EXTERN_C_BEGIN 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3011 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3012 { 3013 Mat_MPIAIJ *b; 3014 PetscErrorCode ierr; 3015 PetscInt i; 3016 3017 PetscFunctionBegin; 3018 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3019 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3020 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3021 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3022 3023 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3024 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3025 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3026 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3027 if (d_nnz) { 3028 for (i=0; i<B->rmap->n; i++) { 3029 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3030 } 3031 } 3032 if (o_nnz) { 3033 for (i=0; i<B->rmap->n; i++) { 3034 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3035 } 3036 } 3037 b = (Mat_MPIAIJ*)B->data; 3038 3039 if (!B->preallocated) { 3040 /* Explicitly create 2 MATSEQAIJ matrices. */ 3041 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3042 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3043 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3044 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3045 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3046 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3047 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3048 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3049 } 3050 3051 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3052 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3053 B->preallocated = PETSC_TRUE; 3054 PetscFunctionReturn(0); 3055 } 3056 EXTERN_C_END 3057 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3060 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3061 { 3062 Mat mat; 3063 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3064 PetscErrorCode ierr; 3065 3066 PetscFunctionBegin; 3067 *newmat = 0; 3068 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3069 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3070 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3071 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3072 a = (Mat_MPIAIJ*)mat->data; 3073 3074 mat->factortype = matin->factortype; 3075 mat->rmap->bs = matin->rmap->bs; 3076 mat->assembled = PETSC_TRUE; 3077 mat->insertmode = NOT_SET_VALUES; 3078 mat->preallocated = PETSC_TRUE; 3079 3080 a->size = oldmat->size; 3081 a->rank = oldmat->rank; 3082 a->donotstash = oldmat->donotstash; 3083 a->roworiented = oldmat->roworiented; 3084 a->rowindices = 0; 3085 a->rowvalues = 0; 3086 a->getrowactive = PETSC_FALSE; 3087 3088 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3089 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3090 3091 if (oldmat->colmap) { 3092 #if defined (PETSC_USE_CTABLE) 3093 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3094 #else 3095 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3096 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3097 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3098 #endif 3099 } else a->colmap = 0; 3100 if (oldmat->garray) { 3101 PetscInt len; 3102 len = oldmat->B->cmap->n; 3103 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3104 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3105 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3106 } else a->garray = 0; 3107 3108 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3109 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3110 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3111 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3112 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3113 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3114 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3115 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3116 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3117 *newmat = mat; 3118 PetscFunctionReturn(0); 3119 } 3120 3121 /* 3122 Allows sending/receiving larger messages then 2 gigabytes in a single call 3123 */ 3124 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3125 { 3126 int ierr; 3127 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3128 PetscInt i,numchunks; 3129 PetscMPIInt icnt; 3130 3131 numchunks = cnt/CHUNKSIZE + 1; 3132 for (i=0; i<numchunks; i++) { 3133 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3134 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 3135 if (type == MPIU_INT) { 3136 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3137 } else if (type == MPIU_SCALAR) { 3138 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3139 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3140 } 3141 return 0; 3142 } 3143 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3144 { 3145 int ierr; 3146 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3147 MPI_Status status; 3148 PetscInt i,numchunks; 3149 PetscMPIInt icnt; 3150 3151 numchunks = cnt/CHUNKSIZE + 1; 3152 for (i=0; i<numchunks; i++) { 3153 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3154 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 3155 if (type == MPIU_INT) { 3156 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3157 } else if (type == MPIU_SCALAR) { 3158 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3159 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3160 } 3161 return 0; 3162 } 3163 3164 #undef __FUNCT__ 3165 #define __FUNCT__ "MatLoad_MPIAIJ" 3166 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3167 { 3168 PetscScalar *vals,*svals; 3169 MPI_Comm comm = ((PetscObject)viewer)->comm; 3170 PetscErrorCode ierr; 3171 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3172 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3173 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3174 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3175 PetscInt cend,cstart,n,*rowners,sizesset=1; 3176 int fd; 3177 3178 PetscFunctionBegin; 3179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3180 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3181 if (!rank) { 3182 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3183 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3184 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3185 } 3186 3187 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3188 3189 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3190 M = header[1]; N = header[2]; 3191 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3192 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3193 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3194 3195 /* If global sizes are set, check if they are consistent with that given in the file */ 3196 if (sizesset) { 3197 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3198 } 3199 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3200 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3201 3202 /* determine ownership of all rows */ 3203 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3204 else m = newMat->rmap->n; /* Set by user */ 3205 3206 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3207 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3208 3209 /* First process needs enough room for process with most rows */ 3210 if (!rank) { 3211 mmax = rowners[1]; 3212 for (i=2; i<size; i++) { 3213 mmax = PetscMax(mmax,rowners[i]); 3214 } 3215 } else mmax = m; 3216 3217 rowners[0] = 0; 3218 for (i=2; i<=size; i++) { 3219 rowners[i] += rowners[i-1]; 3220 } 3221 rstart = rowners[rank]; 3222 rend = rowners[rank+1]; 3223 3224 /* distribute row lengths to all processors */ 3225 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3226 if (!rank) { 3227 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3228 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3229 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3230 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3231 for (j=0; j<m; j++) { 3232 procsnz[0] += ourlens[j]; 3233 } 3234 for (i=1; i<size; i++) { 3235 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3236 /* calculate the number of nonzeros on each processor */ 3237 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3238 procsnz[i] += rowlengths[j]; 3239 } 3240 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3241 } 3242 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3243 } else { 3244 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3245 } 3246 3247 if (!rank) { 3248 /* determine max buffer needed and allocate it */ 3249 maxnz = 0; 3250 for (i=0; i<size; i++) { 3251 maxnz = PetscMax(maxnz,procsnz[i]); 3252 } 3253 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3254 3255 /* read in my part of the matrix column indices */ 3256 nz = procsnz[0]; 3257 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3258 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3259 3260 /* read in every one elses and ship off */ 3261 for (i=1; i<size; i++) { 3262 nz = procsnz[i]; 3263 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3264 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3265 } 3266 ierr = PetscFree(cols);CHKERRQ(ierr); 3267 } else { 3268 /* determine buffer space needed for message */ 3269 nz = 0; 3270 for (i=0; i<m; i++) { 3271 nz += ourlens[i]; 3272 } 3273 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3274 3275 /* receive message of column indices*/ 3276 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3277 } 3278 3279 /* determine column ownership if matrix is not square */ 3280 if (N != M) { 3281 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3282 else n = newMat->cmap->n; 3283 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3284 cstart = cend - n; 3285 } else { 3286 cstart = rstart; 3287 cend = rend; 3288 n = cend - cstart; 3289 } 3290 3291 /* loop over local rows, determining number of off diagonal entries */ 3292 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3293 jj = 0; 3294 for (i=0; i<m; i++) { 3295 for (j=0; j<ourlens[i]; j++) { 3296 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3297 jj++; 3298 } 3299 } 3300 3301 for (i=0; i<m; i++) { 3302 ourlens[i] -= offlens[i]; 3303 } 3304 if (!sizesset) { 3305 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3306 } 3307 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3308 3309 for (i=0; i<m; i++) { 3310 ourlens[i] += offlens[i]; 3311 } 3312 3313 if (!rank) { 3314 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3315 3316 /* read in my part of the matrix numerical values */ 3317 nz = procsnz[0]; 3318 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3319 3320 /* insert into matrix */ 3321 jj = rstart; 3322 smycols = mycols; 3323 svals = vals; 3324 for (i=0; i<m; i++) { 3325 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3326 smycols += ourlens[i]; 3327 svals += ourlens[i]; 3328 jj++; 3329 } 3330 3331 /* read in other processors and ship out */ 3332 for (i=1; i<size; i++) { 3333 nz = procsnz[i]; 3334 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3335 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3336 } 3337 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3338 } else { 3339 /* receive numeric values */ 3340 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3341 3342 /* receive message of values*/ 3343 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3344 3345 /* insert into matrix */ 3346 jj = rstart; 3347 smycols = mycols; 3348 svals = vals; 3349 for (i=0; i<m; i++) { 3350 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3351 smycols += ourlens[i]; 3352 svals += ourlens[i]; 3353 jj++; 3354 } 3355 } 3356 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3357 ierr = PetscFree(vals);CHKERRQ(ierr); 3358 ierr = PetscFree(mycols);CHKERRQ(ierr); 3359 ierr = PetscFree(rowners);CHKERRQ(ierr); 3360 3361 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3362 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3363 PetscFunctionReturn(0); 3364 } 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3368 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3369 { 3370 PetscErrorCode ierr; 3371 IS iscol_local; 3372 PetscInt csize; 3373 3374 PetscFunctionBegin; 3375 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3376 if (call == MAT_REUSE_MATRIX) { 3377 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3378 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3379 } else { 3380 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3381 } 3382 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3383 if (call == MAT_INITIAL_MATRIX) { 3384 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3385 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3386 } 3387 PetscFunctionReturn(0); 3388 } 3389 3390 #undef __FUNCT__ 3391 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3392 /* 3393 Not great since it makes two copies of the submatrix, first an SeqAIJ 3394 in local and then by concatenating the local matrices the end result. 3395 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3396 3397 Note: This requires a sequential iscol with all indices. 3398 */ 3399 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3400 { 3401 PetscErrorCode ierr; 3402 PetscMPIInt rank,size; 3403 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3404 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3405 Mat *local,M,Mreuse; 3406 MatScalar *vwork,*aa; 3407 MPI_Comm comm = ((PetscObject)mat)->comm; 3408 Mat_SeqAIJ *aij; 3409 3410 3411 PetscFunctionBegin; 3412 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3413 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3414 3415 if (call == MAT_REUSE_MATRIX) { 3416 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3417 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3418 local = &Mreuse; 3419 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3420 } else { 3421 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3422 Mreuse = *local; 3423 ierr = PetscFree(local);CHKERRQ(ierr); 3424 } 3425 3426 /* 3427 m - number of local rows 3428 n - number of columns (same on all processors) 3429 rstart - first row in new global matrix generated 3430 */ 3431 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3432 if (call == MAT_INITIAL_MATRIX) { 3433 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3434 ii = aij->i; 3435 jj = aij->j; 3436 3437 /* 3438 Determine the number of non-zeros in the diagonal and off-diagonal 3439 portions of the matrix in order to do correct preallocation 3440 */ 3441 3442 /* first get start and end of "diagonal" columns */ 3443 if (csize == PETSC_DECIDE) { 3444 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3445 if (mglobal == n) { /* square matrix */ 3446 nlocal = m; 3447 } else { 3448 nlocal = n/size + ((n % size) > rank); 3449 } 3450 } else { 3451 nlocal = csize; 3452 } 3453 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3454 rstart = rend - nlocal; 3455 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3456 3457 /* next, compute all the lengths */ 3458 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3459 olens = dlens + m; 3460 for (i=0; i<m; i++) { 3461 jend = ii[i+1] - ii[i]; 3462 olen = 0; 3463 dlen = 0; 3464 for (j=0; j<jend; j++) { 3465 if (*jj < rstart || *jj >= rend) olen++; 3466 else dlen++; 3467 jj++; 3468 } 3469 olens[i] = olen; 3470 dlens[i] = dlen; 3471 } 3472 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3473 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3474 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3475 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3476 ierr = PetscFree(dlens);CHKERRQ(ierr); 3477 } else { 3478 PetscInt ml,nl; 3479 3480 M = *newmat; 3481 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3482 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3483 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3484 /* 3485 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3486 rather than the slower MatSetValues(). 3487 */ 3488 M->was_assembled = PETSC_TRUE; 3489 M->assembled = PETSC_FALSE; 3490 } 3491 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3492 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3493 ii = aij->i; 3494 jj = aij->j; 3495 aa = aij->a; 3496 for (i=0; i<m; i++) { 3497 row = rstart + i; 3498 nz = ii[i+1] - ii[i]; 3499 cwork = jj; jj += nz; 3500 vwork = aa; aa += nz; 3501 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3502 } 3503 3504 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3505 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3506 *newmat = M; 3507 3508 /* save submatrix used in processor for next request */ 3509 if (call == MAT_INITIAL_MATRIX) { 3510 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3511 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3512 } 3513 3514 PetscFunctionReturn(0); 3515 } 3516 3517 EXTERN_C_BEGIN 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3520 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3521 { 3522 PetscInt m,cstart, cend,j,nnz,i,d; 3523 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3524 const PetscInt *JJ; 3525 PetscScalar *values; 3526 PetscErrorCode ierr; 3527 3528 PetscFunctionBegin; 3529 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3530 3531 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3532 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3533 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3534 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3535 m = B->rmap->n; 3536 cstart = B->cmap->rstart; 3537 cend = B->cmap->rend; 3538 rstart = B->rmap->rstart; 3539 3540 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3541 3542 #if defined(PETSC_USE_DEBUGGING) 3543 for (i=0; i<m; i++) { 3544 nnz = Ii[i+1]- Ii[i]; 3545 JJ = J + Ii[i]; 3546 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3547 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3548 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3549 } 3550 #endif 3551 3552 for (i=0; i<m; i++) { 3553 nnz = Ii[i+1]- Ii[i]; 3554 JJ = J + Ii[i]; 3555 nnz_max = PetscMax(nnz_max,nnz); 3556 d = 0; 3557 for (j=0; j<nnz; j++) { 3558 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3559 } 3560 d_nnz[i] = d; 3561 o_nnz[i] = nnz - d; 3562 } 3563 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3564 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3565 3566 if (v) values = (PetscScalar*)v; 3567 else { 3568 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3569 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3570 } 3571 3572 for (i=0; i<m; i++) { 3573 ii = i + rstart; 3574 nnz = Ii[i+1]- Ii[i]; 3575 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3576 } 3577 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3578 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3579 3580 if (!v) { 3581 ierr = PetscFree(values);CHKERRQ(ierr); 3582 } 3583 PetscFunctionReturn(0); 3584 } 3585 EXTERN_C_END 3586 3587 #undef __FUNCT__ 3588 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3589 /*@ 3590 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3591 (the default parallel PETSc format). 3592 3593 Collective on MPI_Comm 3594 3595 Input Parameters: 3596 + B - the matrix 3597 . i - the indices into j for the start of each local row (starts with zero) 3598 . j - the column indices for each local row (starts with zero) 3599 - v - optional values in the matrix 3600 3601 Level: developer 3602 3603 Notes: 3604 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3605 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3606 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3607 3608 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3609 3610 The format which is used for the sparse matrix input, is equivalent to a 3611 row-major ordering.. i.e for the following matrix, the input data expected is 3612 as shown: 3613 3614 1 0 0 3615 2 0 3 P0 3616 ------- 3617 4 5 6 P1 3618 3619 Process0 [P0]: rows_owned=[0,1] 3620 i = {0,1,3} [size = nrow+1 = 2+1] 3621 j = {0,0,2} [size = nz = 6] 3622 v = {1,2,3} [size = nz = 6] 3623 3624 Process1 [P1]: rows_owned=[2] 3625 i = {0,3} [size = nrow+1 = 1+1] 3626 j = {0,1,2} [size = nz = 6] 3627 v = {4,5,6} [size = nz = 6] 3628 3629 .keywords: matrix, aij, compressed row, sparse, parallel 3630 3631 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3632 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3633 @*/ 3634 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3635 { 3636 PetscErrorCode ierr; 3637 3638 PetscFunctionBegin; 3639 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3640 PetscFunctionReturn(0); 3641 } 3642 3643 #undef __FUNCT__ 3644 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3645 /*@C 3646 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3647 (the default parallel PETSc format). For good matrix assembly performance 3648 the user should preallocate the matrix storage by setting the parameters 3649 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3650 performance can be increased by more than a factor of 50. 3651 3652 Collective on MPI_Comm 3653 3654 Input Parameters: 3655 + A - the matrix 3656 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3657 (same value is used for all local rows) 3658 . d_nnz - array containing the number of nonzeros in the various rows of the 3659 DIAGONAL portion of the local submatrix (possibly different for each row) 3660 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3661 The size of this array is equal to the number of local rows, i.e 'm'. 3662 You must leave room for the diagonal entry even if it is zero. 3663 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3664 submatrix (same value is used for all local rows). 3665 - o_nnz - array containing the number of nonzeros in the various rows of the 3666 OFF-DIAGONAL portion of the local submatrix (possibly different for 3667 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3668 structure. The size of this array is equal to the number 3669 of local rows, i.e 'm'. 3670 3671 If the *_nnz parameter is given then the *_nz parameter is ignored 3672 3673 The AIJ format (also called the Yale sparse matrix format or 3674 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3675 storage. The stored row and column indices begin with zero. 3676 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3677 3678 The parallel matrix is partitioned such that the first m0 rows belong to 3679 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3680 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3681 3682 The DIAGONAL portion of the local submatrix of a processor can be defined 3683 as the submatrix which is obtained by extraction the part corresponding to 3684 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3685 first row that belongs to the processor, r2 is the last row belonging to 3686 the this processor, and c1-c2 is range of indices of the local part of a 3687 vector suitable for applying the matrix to. This is an mxn matrix. In the 3688 common case of a square matrix, the row and column ranges are the same and 3689 the DIAGONAL part is also square. The remaining portion of the local 3690 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3691 3692 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3693 3694 You can call MatGetInfo() to get information on how effective the preallocation was; 3695 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3696 You can also run with the option -info and look for messages with the string 3697 malloc in them to see if additional memory allocation was needed. 3698 3699 Example usage: 3700 3701 Consider the following 8x8 matrix with 34 non-zero values, that is 3702 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3703 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3704 as follows: 3705 3706 .vb 3707 1 2 0 | 0 3 0 | 0 4 3708 Proc0 0 5 6 | 7 0 0 | 8 0 3709 9 0 10 | 11 0 0 | 12 0 3710 ------------------------------------- 3711 13 0 14 | 15 16 17 | 0 0 3712 Proc1 0 18 0 | 19 20 21 | 0 0 3713 0 0 0 | 22 23 0 | 24 0 3714 ------------------------------------- 3715 Proc2 25 26 27 | 0 0 28 | 29 0 3716 30 0 0 | 31 32 33 | 0 34 3717 .ve 3718 3719 This can be represented as a collection of submatrices as: 3720 3721 .vb 3722 A B C 3723 D E F 3724 G H I 3725 .ve 3726 3727 Where the submatrices A,B,C are owned by proc0, D,E,F are 3728 owned by proc1, G,H,I are owned by proc2. 3729 3730 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3731 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3732 The 'M','N' parameters are 8,8, and have the same values on all procs. 3733 3734 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3735 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3736 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3737 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3738 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3739 matrix, ans [DF] as another SeqAIJ matrix. 3740 3741 When d_nz, o_nz parameters are specified, d_nz storage elements are 3742 allocated for every row of the local diagonal submatrix, and o_nz 3743 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3744 One way to choose d_nz and o_nz is to use the max nonzerors per local 3745 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3746 In this case, the values of d_nz,o_nz are: 3747 .vb 3748 proc0 : dnz = 2, o_nz = 2 3749 proc1 : dnz = 3, o_nz = 2 3750 proc2 : dnz = 1, o_nz = 4 3751 .ve 3752 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3753 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3754 for proc3. i.e we are using 12+15+10=37 storage locations to store 3755 34 values. 3756 3757 When d_nnz, o_nnz parameters are specified, the storage is specified 3758 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3759 In the above case the values for d_nnz,o_nnz are: 3760 .vb 3761 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3762 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3763 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3764 .ve 3765 Here the space allocated is sum of all the above values i.e 34, and 3766 hence pre-allocation is perfect. 3767 3768 Level: intermediate 3769 3770 .keywords: matrix, aij, compressed row, sparse, parallel 3771 3772 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3773 MPIAIJ, MatGetInfo() 3774 @*/ 3775 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3776 { 3777 PetscErrorCode ierr; 3778 3779 PetscFunctionBegin; 3780 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3781 PetscFunctionReturn(0); 3782 } 3783 3784 #undef __FUNCT__ 3785 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3786 /*@ 3787 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3788 CSR format the local rows. 3789 3790 Collective on MPI_Comm 3791 3792 Input Parameters: 3793 + comm - MPI communicator 3794 . m - number of local rows (Cannot be PETSC_DECIDE) 3795 . n - This value should be the same as the local size used in creating the 3796 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3797 calculated if N is given) For square matrices n is almost always m. 3798 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3799 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3800 . i - row indices 3801 . j - column indices 3802 - a - matrix values 3803 3804 Output Parameter: 3805 . mat - the matrix 3806 3807 Level: intermediate 3808 3809 Notes: 3810 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3811 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3812 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3813 3814 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3815 3816 The format which is used for the sparse matrix input, is equivalent to a 3817 row-major ordering.. i.e for the following matrix, the input data expected is 3818 as shown: 3819 3820 1 0 0 3821 2 0 3 P0 3822 ------- 3823 4 5 6 P1 3824 3825 Process0 [P0]: rows_owned=[0,1] 3826 i = {0,1,3} [size = nrow+1 = 2+1] 3827 j = {0,0,2} [size = nz = 6] 3828 v = {1,2,3} [size = nz = 6] 3829 3830 Process1 [P1]: rows_owned=[2] 3831 i = {0,3} [size = nrow+1 = 1+1] 3832 j = {0,1,2} [size = nz = 6] 3833 v = {4,5,6} [size = nz = 6] 3834 3835 .keywords: matrix, aij, compressed row, sparse, parallel 3836 3837 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3838 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3839 @*/ 3840 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3841 { 3842 PetscErrorCode ierr; 3843 3844 PetscFunctionBegin; 3845 if (i[0]) { 3846 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3847 } 3848 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3849 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3850 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3851 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3852 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3853 PetscFunctionReturn(0); 3854 } 3855 3856 #undef __FUNCT__ 3857 #define __FUNCT__ "MatCreateMPIAIJ" 3858 /*@C 3859 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3860 (the default parallel PETSc format). For good matrix assembly performance 3861 the user should preallocate the matrix storage by setting the parameters 3862 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3863 performance can be increased by more than a factor of 50. 3864 3865 Collective on MPI_Comm 3866 3867 Input Parameters: 3868 + comm - MPI communicator 3869 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3870 This value should be the same as the local size used in creating the 3871 y vector for the matrix-vector product y = Ax. 3872 . n - This value should be the same as the local size used in creating the 3873 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3874 calculated if N is given) For square matrices n is almost always m. 3875 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3876 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3877 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3878 (same value is used for all local rows) 3879 . d_nnz - array containing the number of nonzeros in the various rows of the 3880 DIAGONAL portion of the local submatrix (possibly different for each row) 3881 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3882 The size of this array is equal to the number of local rows, i.e 'm'. 3883 You must leave room for the diagonal entry even if it is zero. 3884 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3885 submatrix (same value is used for all local rows). 3886 - o_nnz - array containing the number of nonzeros in the various rows of the 3887 OFF-DIAGONAL portion of the local submatrix (possibly different for 3888 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3889 structure. The size of this array is equal to the number 3890 of local rows, i.e 'm'. 3891 3892 Output Parameter: 3893 . A - the matrix 3894 3895 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3896 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3897 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3898 3899 Notes: 3900 If the *_nnz parameter is given then the *_nz parameter is ignored 3901 3902 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3903 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3904 storage requirements for this matrix. 3905 3906 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3907 processor than it must be used on all processors that share the object for 3908 that argument. 3909 3910 The user MUST specify either the local or global matrix dimensions 3911 (possibly both). 3912 3913 The parallel matrix is partitioned across processors such that the 3914 first m0 rows belong to process 0, the next m1 rows belong to 3915 process 1, the next m2 rows belong to process 2 etc.. where 3916 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3917 values corresponding to [m x N] submatrix. 3918 3919 The columns are logically partitioned with the n0 columns belonging 3920 to 0th partition, the next n1 columns belonging to the next 3921 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3922 3923 The DIAGONAL portion of the local submatrix on any given processor 3924 is the submatrix corresponding to the rows and columns m,n 3925 corresponding to the given processor. i.e diagonal matrix on 3926 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3927 etc. The remaining portion of the local submatrix [m x (N-n)] 3928 constitute the OFF-DIAGONAL portion. The example below better 3929 illustrates this concept. 3930 3931 For a square global matrix we define each processor's diagonal portion 3932 to be its local rows and the corresponding columns (a square submatrix); 3933 each processor's off-diagonal portion encompasses the remainder of the 3934 local matrix (a rectangular submatrix). 3935 3936 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3937 3938 When calling this routine with a single process communicator, a matrix of 3939 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3940 type of communicator, use the construction mechanism: 3941 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3942 3943 By default, this format uses inodes (identical nodes) when possible. 3944 We search for consecutive rows with the same nonzero structure, thereby 3945 reusing matrix information to achieve increased efficiency. 3946 3947 Options Database Keys: 3948 + -mat_no_inode - Do not use inodes 3949 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3950 - -mat_aij_oneindex - Internally use indexing starting at 1 3951 rather than 0. Note that when calling MatSetValues(), 3952 the user still MUST index entries starting at 0! 3953 3954 3955 Example usage: 3956 3957 Consider the following 8x8 matrix with 34 non-zero values, that is 3958 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3959 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3960 as follows: 3961 3962 .vb 3963 1 2 0 | 0 3 0 | 0 4 3964 Proc0 0 5 6 | 7 0 0 | 8 0 3965 9 0 10 | 11 0 0 | 12 0 3966 ------------------------------------- 3967 13 0 14 | 15 16 17 | 0 0 3968 Proc1 0 18 0 | 19 20 21 | 0 0 3969 0 0 0 | 22 23 0 | 24 0 3970 ------------------------------------- 3971 Proc2 25 26 27 | 0 0 28 | 29 0 3972 30 0 0 | 31 32 33 | 0 34 3973 .ve 3974 3975 This can be represented as a collection of submatrices as: 3976 3977 .vb 3978 A B C 3979 D E F 3980 G H I 3981 .ve 3982 3983 Where the submatrices A,B,C are owned by proc0, D,E,F are 3984 owned by proc1, G,H,I are owned by proc2. 3985 3986 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3987 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3988 The 'M','N' parameters are 8,8, and have the same values on all procs. 3989 3990 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3991 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3992 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3993 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3994 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3995 matrix, ans [DF] as another SeqAIJ matrix. 3996 3997 When d_nz, o_nz parameters are specified, d_nz storage elements are 3998 allocated for every row of the local diagonal submatrix, and o_nz 3999 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4000 One way to choose d_nz and o_nz is to use the max nonzerors per local 4001 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4002 In this case, the values of d_nz,o_nz are: 4003 .vb 4004 proc0 : dnz = 2, o_nz = 2 4005 proc1 : dnz = 3, o_nz = 2 4006 proc2 : dnz = 1, o_nz = 4 4007 .ve 4008 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4009 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4010 for proc3. i.e we are using 12+15+10=37 storage locations to store 4011 34 values. 4012 4013 When d_nnz, o_nnz parameters are specified, the storage is specified 4014 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4015 In the above case the values for d_nnz,o_nnz are: 4016 .vb 4017 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4018 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4019 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4020 .ve 4021 Here the space allocated is sum of all the above values i.e 34, and 4022 hence pre-allocation is perfect. 4023 4024 Level: intermediate 4025 4026 .keywords: matrix, aij, compressed row, sparse, parallel 4027 4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4029 MPIAIJ, MatCreateMPIAIJWithArrays() 4030 @*/ 4031 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4032 { 4033 PetscErrorCode ierr; 4034 PetscMPIInt size; 4035 4036 PetscFunctionBegin; 4037 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4038 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4040 if (size > 1) { 4041 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4042 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4043 } else { 4044 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4045 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4046 } 4047 PetscFunctionReturn(0); 4048 } 4049 4050 #undef __FUNCT__ 4051 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4052 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4053 { 4054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4055 4056 PetscFunctionBegin; 4057 *Ad = a->A; 4058 *Ao = a->B; 4059 *colmap = a->garray; 4060 PetscFunctionReturn(0); 4061 } 4062 4063 #undef __FUNCT__ 4064 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4065 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4066 { 4067 PetscErrorCode ierr; 4068 PetscInt i; 4069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4070 4071 PetscFunctionBegin; 4072 if (coloring->ctype == IS_COLORING_GLOBAL) { 4073 ISColoringValue *allcolors,*colors; 4074 ISColoring ocoloring; 4075 4076 /* set coloring for diagonal portion */ 4077 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4078 4079 /* set coloring for off-diagonal portion */ 4080 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4081 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4082 for (i=0; i<a->B->cmap->n; i++) { 4083 colors[i] = allcolors[a->garray[i]]; 4084 } 4085 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4086 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4087 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4088 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4089 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4090 ISColoringValue *colors; 4091 PetscInt *larray; 4092 ISColoring ocoloring; 4093 4094 /* set coloring for diagonal portion */ 4095 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4096 for (i=0; i<a->A->cmap->n; i++) { 4097 larray[i] = i + A->cmap->rstart; 4098 } 4099 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4100 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4101 for (i=0; i<a->A->cmap->n; i++) { 4102 colors[i] = coloring->colors[larray[i]]; 4103 } 4104 ierr = PetscFree(larray);CHKERRQ(ierr); 4105 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4106 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4107 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4108 4109 /* set coloring for off-diagonal portion */ 4110 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4111 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4112 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4113 for (i=0; i<a->B->cmap->n; i++) { 4114 colors[i] = coloring->colors[larray[i]]; 4115 } 4116 ierr = PetscFree(larray);CHKERRQ(ierr); 4117 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4118 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4119 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4120 } else { 4121 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4122 } 4123 4124 PetscFunctionReturn(0); 4125 } 4126 4127 #if defined(PETSC_HAVE_ADIC) 4128 #undef __FUNCT__ 4129 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4130 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4131 { 4132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4133 PetscErrorCode ierr; 4134 4135 PetscFunctionBegin; 4136 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4137 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4138 PetscFunctionReturn(0); 4139 } 4140 #endif 4141 4142 #undef __FUNCT__ 4143 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4144 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4145 { 4146 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4147 PetscErrorCode ierr; 4148 4149 PetscFunctionBegin; 4150 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4151 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4152 PetscFunctionReturn(0); 4153 } 4154 4155 #undef __FUNCT__ 4156 #define __FUNCT__ "MatMerge" 4157 /*@ 4158 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4159 matrices from each processor 4160 4161 Collective on MPI_Comm 4162 4163 Input Parameters: 4164 + comm - the communicators the parallel matrix will live on 4165 . inmat - the input sequential matrices 4166 . n - number of local columns (or PETSC_DECIDE) 4167 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4168 4169 Output Parameter: 4170 . outmat - the parallel matrix generated 4171 4172 Level: advanced 4173 4174 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4175 4176 @*/ 4177 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4178 { 4179 PetscErrorCode ierr; 4180 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4181 PetscInt *indx; 4182 PetscScalar *values; 4183 4184 PetscFunctionBegin; 4185 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4186 if (scall == MAT_INITIAL_MATRIX){ 4187 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4188 if (n == PETSC_DECIDE){ 4189 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4190 } 4191 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4192 rstart -= m; 4193 4194 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4195 for (i=0;i<m;i++) { 4196 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4197 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4198 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4199 } 4200 /* This routine will ONLY return MPIAIJ type matrix */ 4201 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4202 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4203 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4204 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4205 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4206 4207 } else if (scall == MAT_REUSE_MATRIX){ 4208 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4209 } else { 4210 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4211 } 4212 4213 for (i=0;i<m;i++) { 4214 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4215 Ii = i + rstart; 4216 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4217 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4218 } 4219 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4220 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4221 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4222 4223 PetscFunctionReturn(0); 4224 } 4225 4226 #undef __FUNCT__ 4227 #define __FUNCT__ "MatFileSplit" 4228 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4229 { 4230 PetscErrorCode ierr; 4231 PetscMPIInt rank; 4232 PetscInt m,N,i,rstart,nnz; 4233 size_t len; 4234 const PetscInt *indx; 4235 PetscViewer out; 4236 char *name; 4237 Mat B; 4238 const PetscScalar *values; 4239 4240 PetscFunctionBegin; 4241 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4242 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4243 /* Should this be the type of the diagonal block of A? */ 4244 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4245 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4246 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4247 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4248 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4249 for (i=0;i<m;i++) { 4250 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4251 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4252 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4253 } 4254 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4255 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4256 4257 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4258 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4259 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4260 sprintf(name,"%s.%d",outfile,rank); 4261 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4262 ierr = PetscFree(name); 4263 ierr = MatView(B,out);CHKERRQ(ierr); 4264 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4265 ierr = MatDestroy(B);CHKERRQ(ierr); 4266 PetscFunctionReturn(0); 4267 } 4268 4269 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4270 #undef __FUNCT__ 4271 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4272 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4273 { 4274 PetscErrorCode ierr; 4275 Mat_Merge_SeqsToMPI *merge; 4276 PetscContainer container; 4277 4278 PetscFunctionBegin; 4279 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4280 if (container) { 4281 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4282 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4283 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4284 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4285 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4286 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4287 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4288 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4289 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4290 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4291 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4292 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4293 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4294 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4295 4296 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4297 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4298 } 4299 ierr = PetscFree(merge);CHKERRQ(ierr); 4300 4301 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4302 PetscFunctionReturn(0); 4303 } 4304 4305 #include "../src/mat/utils/freespace.h" 4306 #include "petscbt.h" 4307 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4310 /*@C 4311 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4312 matrices from each processor 4313 4314 Collective on MPI_Comm 4315 4316 Input Parameters: 4317 + comm - the communicators the parallel matrix will live on 4318 . seqmat - the input sequential matrices 4319 . m - number of local rows (or PETSC_DECIDE) 4320 . n - number of local columns (or PETSC_DECIDE) 4321 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4322 4323 Output Parameter: 4324 . mpimat - the parallel matrix generated 4325 4326 Level: advanced 4327 4328 Notes: 4329 The dimensions of the sequential matrix in each processor MUST be the same. 4330 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4331 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4332 @*/ 4333 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4334 { 4335 PetscErrorCode ierr; 4336 MPI_Comm comm=((PetscObject)mpimat)->comm; 4337 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4338 PetscMPIInt size,rank,taga,*len_s; 4339 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4340 PetscInt proc,m; 4341 PetscInt **buf_ri,**buf_rj; 4342 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4343 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4344 MPI_Request *s_waits,*r_waits; 4345 MPI_Status *status; 4346 MatScalar *aa=a->a; 4347 MatScalar **abuf_r,*ba_i; 4348 Mat_Merge_SeqsToMPI *merge; 4349 PetscContainer container; 4350 4351 PetscFunctionBegin; 4352 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4353 4354 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4355 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4356 4357 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4358 if (container) { 4359 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4360 } 4361 bi = merge->bi; 4362 bj = merge->bj; 4363 buf_ri = merge->buf_ri; 4364 buf_rj = merge->buf_rj; 4365 4366 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4367 owners = merge->rowmap->range; 4368 len_s = merge->len_s; 4369 4370 /* send and recv matrix values */ 4371 /*-----------------------------*/ 4372 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4373 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4374 4375 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4376 for (proc=0,k=0; proc<size; proc++){ 4377 if (!len_s[proc]) continue; 4378 i = owners[proc]; 4379 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4380 k++; 4381 } 4382 4383 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4384 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4385 ierr = PetscFree(status);CHKERRQ(ierr); 4386 4387 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4388 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4389 4390 /* insert mat values of mpimat */ 4391 /*----------------------------*/ 4392 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4393 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4394 4395 for (k=0; k<merge->nrecv; k++){ 4396 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4397 nrows = *(buf_ri_k[k]); 4398 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4399 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4400 } 4401 4402 /* set values of ba */ 4403 m = merge->rowmap->n; 4404 for (i=0; i<m; i++) { 4405 arow = owners[rank] + i; 4406 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4407 bnzi = bi[i+1] - bi[i]; 4408 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4409 4410 /* add local non-zero vals of this proc's seqmat into ba */ 4411 anzi = ai[arow+1] - ai[arow]; 4412 aj = a->j + ai[arow]; 4413 aa = a->a + ai[arow]; 4414 nextaj = 0; 4415 for (j=0; nextaj<anzi; j++){ 4416 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4417 ba_i[j] += aa[nextaj++]; 4418 } 4419 } 4420 4421 /* add received vals into ba */ 4422 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4423 /* i-th row */ 4424 if (i == *nextrow[k]) { 4425 anzi = *(nextai[k]+1) - *nextai[k]; 4426 aj = buf_rj[k] + *(nextai[k]); 4427 aa = abuf_r[k] + *(nextai[k]); 4428 nextaj = 0; 4429 for (j=0; nextaj<anzi; j++){ 4430 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4431 ba_i[j] += aa[nextaj++]; 4432 } 4433 } 4434 nextrow[k]++; nextai[k]++; 4435 } 4436 } 4437 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4438 } 4439 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4440 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4441 4442 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4443 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4444 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4445 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4446 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4447 PetscFunctionReturn(0); 4448 } 4449 4450 #undef __FUNCT__ 4451 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4452 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4453 { 4454 PetscErrorCode ierr; 4455 Mat B_mpi; 4456 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4457 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4458 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4459 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4460 PetscInt len,proc,*dnz,*onz; 4461 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4462 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4463 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4464 MPI_Status *status; 4465 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4466 PetscBT lnkbt; 4467 Mat_Merge_SeqsToMPI *merge; 4468 PetscContainer container; 4469 4470 PetscFunctionBegin; 4471 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4472 4473 /* make sure it is a PETSc comm */ 4474 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4475 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4476 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4477 4478 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4479 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4480 4481 /* determine row ownership */ 4482 /*---------------------------------------------------------*/ 4483 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4484 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4485 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4486 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4487 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4488 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4489 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4490 4491 m = merge->rowmap->n; 4492 M = merge->rowmap->N; 4493 owners = merge->rowmap->range; 4494 4495 /* determine the number of messages to send, their lengths */ 4496 /*---------------------------------------------------------*/ 4497 len_s = merge->len_s; 4498 4499 len = 0; /* length of buf_si[] */ 4500 merge->nsend = 0; 4501 for (proc=0; proc<size; proc++){ 4502 len_si[proc] = 0; 4503 if (proc == rank){ 4504 len_s[proc] = 0; 4505 } else { 4506 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4507 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4508 } 4509 if (len_s[proc]) { 4510 merge->nsend++; 4511 nrows = 0; 4512 for (i=owners[proc]; i<owners[proc+1]; i++){ 4513 if (ai[i+1] > ai[i]) nrows++; 4514 } 4515 len_si[proc] = 2*(nrows+1); 4516 len += len_si[proc]; 4517 } 4518 } 4519 4520 /* determine the number and length of messages to receive for ij-structure */ 4521 /*-------------------------------------------------------------------------*/ 4522 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4523 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4524 4525 /* post the Irecv of j-structure */ 4526 /*-------------------------------*/ 4527 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4528 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4529 4530 /* post the Isend of j-structure */ 4531 /*--------------------------------*/ 4532 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4533 4534 for (proc=0, k=0; proc<size; proc++){ 4535 if (!len_s[proc]) continue; 4536 i = owners[proc]; 4537 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4538 k++; 4539 } 4540 4541 /* receives and sends of j-structure are complete */ 4542 /*------------------------------------------------*/ 4543 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4544 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4545 4546 /* send and recv i-structure */ 4547 /*---------------------------*/ 4548 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4549 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4550 4551 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4552 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4553 for (proc=0,k=0; proc<size; proc++){ 4554 if (!len_s[proc]) continue; 4555 /* form outgoing message for i-structure: 4556 buf_si[0]: nrows to be sent 4557 [1:nrows]: row index (global) 4558 [nrows+1:2*nrows+1]: i-structure index 4559 */ 4560 /*-------------------------------------------*/ 4561 nrows = len_si[proc]/2 - 1; 4562 buf_si_i = buf_si + nrows+1; 4563 buf_si[0] = nrows; 4564 buf_si_i[0] = 0; 4565 nrows = 0; 4566 for (i=owners[proc]; i<owners[proc+1]; i++){ 4567 anzi = ai[i+1] - ai[i]; 4568 if (anzi) { 4569 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4570 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4571 nrows++; 4572 } 4573 } 4574 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4575 k++; 4576 buf_si += len_si[proc]; 4577 } 4578 4579 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4580 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4581 4582 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4583 for (i=0; i<merge->nrecv; i++){ 4584 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4585 } 4586 4587 ierr = PetscFree(len_si);CHKERRQ(ierr); 4588 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4589 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4590 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4591 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4592 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4593 ierr = PetscFree(status);CHKERRQ(ierr); 4594 4595 /* compute a local seq matrix in each processor */ 4596 /*----------------------------------------------*/ 4597 /* allocate bi array and free space for accumulating nonzero column info */ 4598 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4599 bi[0] = 0; 4600 4601 /* create and initialize a linked list */ 4602 nlnk = N+1; 4603 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4604 4605 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4606 len = 0; 4607 len = ai[owners[rank+1]] - ai[owners[rank]]; 4608 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4609 current_space = free_space; 4610 4611 /* determine symbolic info for each local row */ 4612 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4613 4614 for (k=0; k<merge->nrecv; k++){ 4615 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4616 nrows = *buf_ri_k[k]; 4617 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4618 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4619 } 4620 4621 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4622 len = 0; 4623 for (i=0;i<m;i++) { 4624 bnzi = 0; 4625 /* add local non-zero cols of this proc's seqmat into lnk */ 4626 arow = owners[rank] + i; 4627 anzi = ai[arow+1] - ai[arow]; 4628 aj = a->j + ai[arow]; 4629 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4630 bnzi += nlnk; 4631 /* add received col data into lnk */ 4632 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4633 if (i == *nextrow[k]) { /* i-th row */ 4634 anzi = *(nextai[k]+1) - *nextai[k]; 4635 aj = buf_rj[k] + *nextai[k]; 4636 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4637 bnzi += nlnk; 4638 nextrow[k]++; nextai[k]++; 4639 } 4640 } 4641 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4642 4643 /* if free space is not available, make more free space */ 4644 if (current_space->local_remaining<bnzi) { 4645 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4646 nspacedouble++; 4647 } 4648 /* copy data into free space, then initialize lnk */ 4649 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4650 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4651 4652 current_space->array += bnzi; 4653 current_space->local_used += bnzi; 4654 current_space->local_remaining -= bnzi; 4655 4656 bi[i+1] = bi[i] + bnzi; 4657 } 4658 4659 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4660 4661 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4662 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4663 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4664 4665 /* create symbolic parallel matrix B_mpi */ 4666 /*---------------------------------------*/ 4667 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4668 if (n==PETSC_DECIDE) { 4669 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4670 } else { 4671 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4672 } 4673 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4674 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4675 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4676 4677 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4678 B_mpi->assembled = PETSC_FALSE; 4679 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4680 merge->bi = bi; 4681 merge->bj = bj; 4682 merge->buf_ri = buf_ri; 4683 merge->buf_rj = buf_rj; 4684 merge->coi = PETSC_NULL; 4685 merge->coj = PETSC_NULL; 4686 merge->owners_co = PETSC_NULL; 4687 4688 /* attach the supporting struct to B_mpi for reuse */ 4689 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4690 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4691 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4692 *mpimat = B_mpi; 4693 4694 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4695 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4696 PetscFunctionReturn(0); 4697 } 4698 4699 #undef __FUNCT__ 4700 #define __FUNCT__ "MatMerge_SeqsToMPI" 4701 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4702 { 4703 PetscErrorCode ierr; 4704 4705 PetscFunctionBegin; 4706 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4707 if (scall == MAT_INITIAL_MATRIX){ 4708 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4709 } 4710 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4711 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4712 PetscFunctionReturn(0); 4713 } 4714 4715 #undef __FUNCT__ 4716 #define __FUNCT__ "MatGetLocalMat" 4717 /*@ 4718 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4719 4720 Not Collective 4721 4722 Input Parameters: 4723 + A - the matrix 4724 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4725 4726 Output Parameter: 4727 . A_loc - the local sequential matrix generated 4728 4729 Level: developer 4730 4731 @*/ 4732 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4733 { 4734 PetscErrorCode ierr; 4735 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4736 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4737 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4738 MatScalar *aa=a->a,*ba=b->a,*cam; 4739 PetscScalar *ca; 4740 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4741 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4742 4743 PetscFunctionBegin; 4744 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4745 if (scall == MAT_INITIAL_MATRIX){ 4746 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4747 ci[0] = 0; 4748 for (i=0; i<am; i++){ 4749 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4750 } 4751 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4752 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4753 k = 0; 4754 for (i=0; i<am; i++) { 4755 ncols_o = bi[i+1] - bi[i]; 4756 ncols_d = ai[i+1] - ai[i]; 4757 /* off-diagonal portion of A */ 4758 for (jo=0; jo<ncols_o; jo++) { 4759 col = cmap[*bj]; 4760 if (col >= cstart) break; 4761 cj[k] = col; bj++; 4762 ca[k++] = *ba++; 4763 } 4764 /* diagonal portion of A */ 4765 for (j=0; j<ncols_d; j++) { 4766 cj[k] = cstart + *aj++; 4767 ca[k++] = *aa++; 4768 } 4769 /* off-diagonal portion of A */ 4770 for (j=jo; j<ncols_o; j++) { 4771 cj[k] = cmap[*bj++]; 4772 ca[k++] = *ba++; 4773 } 4774 } 4775 /* put together the new matrix */ 4776 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4777 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4778 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4779 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4780 mat->free_a = PETSC_TRUE; 4781 mat->free_ij = PETSC_TRUE; 4782 mat->nonew = 0; 4783 } else if (scall == MAT_REUSE_MATRIX){ 4784 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4785 ci = mat->i; cj = mat->j; cam = mat->a; 4786 for (i=0; i<am; i++) { 4787 /* off-diagonal portion of A */ 4788 ncols_o = bi[i+1] - bi[i]; 4789 for (jo=0; jo<ncols_o; jo++) { 4790 col = cmap[*bj]; 4791 if (col >= cstart) break; 4792 *cam++ = *ba++; bj++; 4793 } 4794 /* diagonal portion of A */ 4795 ncols_d = ai[i+1] - ai[i]; 4796 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4797 /* off-diagonal portion of A */ 4798 for (j=jo; j<ncols_o; j++) { 4799 *cam++ = *ba++; bj++; 4800 } 4801 } 4802 } else { 4803 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4804 } 4805 4806 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4807 PetscFunctionReturn(0); 4808 } 4809 4810 #undef __FUNCT__ 4811 #define __FUNCT__ "MatGetLocalMatCondensed" 4812 /*@C 4813 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4814 4815 Not Collective 4816 4817 Input Parameters: 4818 + A - the matrix 4819 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4820 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4821 4822 Output Parameter: 4823 . A_loc - the local sequential matrix generated 4824 4825 Level: developer 4826 4827 @*/ 4828 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4829 { 4830 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4831 PetscErrorCode ierr; 4832 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4833 IS isrowa,iscola; 4834 Mat *aloc; 4835 4836 PetscFunctionBegin; 4837 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4838 if (!row){ 4839 start = A->rmap->rstart; end = A->rmap->rend; 4840 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4841 } else { 4842 isrowa = *row; 4843 } 4844 if (!col){ 4845 start = A->cmap->rstart; 4846 cmap = a->garray; 4847 nzA = a->A->cmap->n; 4848 nzB = a->B->cmap->n; 4849 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4850 ncols = 0; 4851 for (i=0; i<nzB; i++) { 4852 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4853 else break; 4854 } 4855 imark = i; 4856 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4857 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4858 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4859 } else { 4860 iscola = *col; 4861 } 4862 if (scall != MAT_INITIAL_MATRIX){ 4863 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4864 aloc[0] = *A_loc; 4865 } 4866 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4867 *A_loc = aloc[0]; 4868 ierr = PetscFree(aloc);CHKERRQ(ierr); 4869 if (!row){ 4870 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4871 } 4872 if (!col){ 4873 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4874 } 4875 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4876 PetscFunctionReturn(0); 4877 } 4878 4879 #undef __FUNCT__ 4880 #define __FUNCT__ "MatGetBrowsOfAcols" 4881 /*@C 4882 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4883 4884 Collective on Mat 4885 4886 Input Parameters: 4887 + A,B - the matrices in mpiaij format 4888 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4889 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4890 4891 Output Parameter: 4892 + rowb, colb - index sets of rows and columns of B to extract 4893 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4894 - B_seq - the sequential matrix generated 4895 4896 Level: developer 4897 4898 @*/ 4899 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4900 { 4901 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4902 PetscErrorCode ierr; 4903 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4904 IS isrowb,iscolb; 4905 Mat *bseq; 4906 4907 PetscFunctionBegin; 4908 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4909 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4910 } 4911 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4912 4913 if (scall == MAT_INITIAL_MATRIX){ 4914 start = A->cmap->rstart; 4915 cmap = a->garray; 4916 nzA = a->A->cmap->n; 4917 nzB = a->B->cmap->n; 4918 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4919 ncols = 0; 4920 for (i=0; i<nzB; i++) { /* row < local row index */ 4921 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4922 else break; 4923 } 4924 imark = i; 4925 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4926 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4927 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4928 *brstart = imark; 4929 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4930 } else { 4931 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4932 isrowb = *rowb; iscolb = *colb; 4933 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4934 bseq[0] = *B_seq; 4935 } 4936 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4937 *B_seq = bseq[0]; 4938 ierr = PetscFree(bseq);CHKERRQ(ierr); 4939 if (!rowb){ 4940 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4941 } else { 4942 *rowb = isrowb; 4943 } 4944 if (!colb){ 4945 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4946 } else { 4947 *colb = iscolb; 4948 } 4949 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4950 PetscFunctionReturn(0); 4951 } 4952 4953 #undef __FUNCT__ 4954 #define __FUNCT__ "MatGetBrowsOfAoCols" 4955 /*@C 4956 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4957 of the OFF-DIAGONAL portion of local A 4958 4959 Collective on Mat 4960 4961 Input Parameters: 4962 + A,B - the matrices in mpiaij format 4963 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4964 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4965 . startsj_r - similar to startsj for receives 4966 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4967 4968 Output Parameter: 4969 + B_oth - the sequential matrix generated 4970 4971 Level: developer 4972 4973 @*/ 4974 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4975 { 4976 VecScatter_MPI_General *gen_to,*gen_from; 4977 PetscErrorCode ierr; 4978 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4979 Mat_SeqAIJ *b_oth; 4980 VecScatter ctx=a->Mvctx; 4981 MPI_Comm comm=((PetscObject)ctx)->comm; 4982 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4983 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4984 PetscScalar *rvalues,*svalues; 4985 MatScalar *b_otha,*bufa,*bufA; 4986 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4987 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4988 MPI_Status *sstatus,rstatus; 4989 PetscMPIInt jj; 4990 PetscInt *cols,sbs,rbs; 4991 PetscScalar *vals; 4992 4993 PetscFunctionBegin; 4994 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4995 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4996 } 4997 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4998 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4999 5000 gen_to = (VecScatter_MPI_General*)ctx->todata; 5001 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5002 rvalues = gen_from->values; /* holds the length of receiving row */ 5003 svalues = gen_to->values; /* holds the length of sending row */ 5004 nrecvs = gen_from->n; 5005 nsends = gen_to->n; 5006 5007 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5008 srow = gen_to->indices; /* local row index to be sent */ 5009 sstarts = gen_to->starts; 5010 sprocs = gen_to->procs; 5011 sstatus = gen_to->sstatus; 5012 sbs = gen_to->bs; 5013 rstarts = gen_from->starts; 5014 rprocs = gen_from->procs; 5015 rbs = gen_from->bs; 5016 5017 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5018 if (scall == MAT_INITIAL_MATRIX){ 5019 /* i-array */ 5020 /*---------*/ 5021 /* post receives */ 5022 for (i=0; i<nrecvs; i++){ 5023 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5024 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5025 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5026 } 5027 5028 /* pack the outgoing message */ 5029 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5030 sstartsj[0] = 0; rstartsj[0] = 0; 5031 len = 0; /* total length of j or a array to be sent */ 5032 k = 0; 5033 for (i=0; i<nsends; i++){ 5034 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5035 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5036 for (j=0; j<nrows; j++) { 5037 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5038 for (l=0; l<sbs; l++){ 5039 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5040 rowlen[j*sbs+l] = ncols; 5041 len += ncols; 5042 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5043 } 5044 k++; 5045 } 5046 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5047 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5048 } 5049 /* recvs and sends of i-array are completed */ 5050 i = nrecvs; 5051 while (i--) { 5052 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5053 } 5054 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5055 5056 /* allocate buffers for sending j and a arrays */ 5057 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5058 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5059 5060 /* create i-array of B_oth */ 5061 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5062 b_othi[0] = 0; 5063 len = 0; /* total length of j or a array to be received */ 5064 k = 0; 5065 for (i=0; i<nrecvs; i++){ 5066 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5067 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5068 for (j=0; j<nrows; j++) { 5069 b_othi[k+1] = b_othi[k] + rowlen[j]; 5070 len += rowlen[j]; k++; 5071 } 5072 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5073 } 5074 5075 /* allocate space for j and a arrrays of B_oth */ 5076 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5077 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5078 5079 /* j-array */ 5080 /*---------*/ 5081 /* post receives of j-array */ 5082 for (i=0; i<nrecvs; i++){ 5083 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5084 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5085 } 5086 5087 /* pack the outgoing message j-array */ 5088 k = 0; 5089 for (i=0; i<nsends; i++){ 5090 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5091 bufJ = bufj+sstartsj[i]; 5092 for (j=0; j<nrows; j++) { 5093 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5094 for (ll=0; ll<sbs; ll++){ 5095 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5096 for (l=0; l<ncols; l++){ 5097 *bufJ++ = cols[l]; 5098 } 5099 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5100 } 5101 } 5102 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5103 } 5104 5105 /* recvs and sends of j-array are completed */ 5106 i = nrecvs; 5107 while (i--) { 5108 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5109 } 5110 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5111 } else if (scall == MAT_REUSE_MATRIX){ 5112 sstartsj = *startsj; 5113 rstartsj = *startsj_r; 5114 bufa = *bufa_ptr; 5115 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5116 b_otha = b_oth->a; 5117 } else { 5118 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5119 } 5120 5121 /* a-array */ 5122 /*---------*/ 5123 /* post receives of a-array */ 5124 for (i=0; i<nrecvs; i++){ 5125 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5126 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5127 } 5128 5129 /* pack the outgoing message a-array */ 5130 k = 0; 5131 for (i=0; i<nsends; i++){ 5132 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5133 bufA = bufa+sstartsj[i]; 5134 for (j=0; j<nrows; j++) { 5135 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5136 for (ll=0; ll<sbs; ll++){ 5137 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5138 for (l=0; l<ncols; l++){ 5139 *bufA++ = vals[l]; 5140 } 5141 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5142 } 5143 } 5144 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5145 } 5146 /* recvs and sends of a-array are completed */ 5147 i = nrecvs; 5148 while (i--) { 5149 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5150 } 5151 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5152 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5153 5154 if (scall == MAT_INITIAL_MATRIX){ 5155 /* put together the new matrix */ 5156 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5157 5158 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5159 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5160 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5161 b_oth->free_a = PETSC_TRUE; 5162 b_oth->free_ij = PETSC_TRUE; 5163 b_oth->nonew = 0; 5164 5165 ierr = PetscFree(bufj);CHKERRQ(ierr); 5166 if (!startsj || !bufa_ptr){ 5167 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5168 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5169 } else { 5170 *startsj = sstartsj; 5171 *startsj_r = rstartsj; 5172 *bufa_ptr = bufa; 5173 } 5174 } 5175 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5176 PetscFunctionReturn(0); 5177 } 5178 5179 #undef __FUNCT__ 5180 #define __FUNCT__ "MatGetCommunicationStructs" 5181 /*@C 5182 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5183 5184 Not Collective 5185 5186 Input Parameters: 5187 . A - The matrix in mpiaij format 5188 5189 Output Parameter: 5190 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5191 . colmap - A map from global column index to local index into lvec 5192 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5193 5194 Level: developer 5195 5196 @*/ 5197 #if defined (PETSC_USE_CTABLE) 5198 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5199 #else 5200 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5201 #endif 5202 { 5203 Mat_MPIAIJ *a; 5204 5205 PetscFunctionBegin; 5206 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5207 PetscValidPointer(lvec, 2); 5208 PetscValidPointer(colmap, 3); 5209 PetscValidPointer(multScatter, 4); 5210 a = (Mat_MPIAIJ *) A->data; 5211 if (lvec) *lvec = a->lvec; 5212 if (colmap) *colmap = a->colmap; 5213 if (multScatter) *multScatter = a->Mvctx; 5214 PetscFunctionReturn(0); 5215 } 5216 5217 EXTERN_C_BEGIN 5218 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5219 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5220 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5221 EXTERN_C_END 5222 5223 #undef __FUNCT__ 5224 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5225 /* 5226 Computes (B'*A')' since computing B*A directly is untenable 5227 5228 n p p 5229 ( ) ( ) ( ) 5230 m ( A ) * n ( B ) = m ( C ) 5231 ( ) ( ) ( ) 5232 5233 */ 5234 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5235 { 5236 PetscErrorCode ierr; 5237 Mat At,Bt,Ct; 5238 5239 PetscFunctionBegin; 5240 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5241 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5242 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5243 ierr = MatDestroy(At);CHKERRQ(ierr); 5244 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5245 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5246 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5247 PetscFunctionReturn(0); 5248 } 5249 5250 #undef __FUNCT__ 5251 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5252 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5253 { 5254 PetscErrorCode ierr; 5255 PetscInt m=A->rmap->n,n=B->cmap->n; 5256 Mat Cmat; 5257 5258 PetscFunctionBegin; 5259 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5260 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5261 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5262 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5263 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5264 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5265 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5266 *C = Cmat; 5267 PetscFunctionReturn(0); 5268 } 5269 5270 /* ----------------------------------------------------------------*/ 5271 #undef __FUNCT__ 5272 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5273 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5274 { 5275 PetscErrorCode ierr; 5276 5277 PetscFunctionBegin; 5278 if (scall == MAT_INITIAL_MATRIX){ 5279 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5280 } 5281 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5282 PetscFunctionReturn(0); 5283 } 5284 5285 EXTERN_C_BEGIN 5286 #if defined(PETSC_HAVE_MUMPS) 5287 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5288 #endif 5289 #if defined(PETSC_HAVE_PASTIX) 5290 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5291 #endif 5292 #if defined(PETSC_HAVE_SUPERLU_DIST) 5293 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5294 #endif 5295 #if defined(PETSC_HAVE_SPOOLES) 5296 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5297 #endif 5298 EXTERN_C_END 5299 5300 /*MC 5301 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5302 5303 Options Database Keys: 5304 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5305 5306 Level: beginner 5307 5308 .seealso: MatCreateMPIAIJ() 5309 M*/ 5310 5311 EXTERN_C_BEGIN 5312 #undef __FUNCT__ 5313 #define __FUNCT__ "MatCreate_MPIAIJ" 5314 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5315 { 5316 Mat_MPIAIJ *b; 5317 PetscErrorCode ierr; 5318 PetscMPIInt size; 5319 5320 PetscFunctionBegin; 5321 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5322 5323 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5324 B->data = (void*)b; 5325 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5326 B->rmap->bs = 1; 5327 B->assembled = PETSC_FALSE; 5328 5329 B->insertmode = NOT_SET_VALUES; 5330 b->size = size; 5331 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5332 5333 /* build cache for off array entries formed */ 5334 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5335 b->donotstash = PETSC_FALSE; 5336 b->colmap = 0; 5337 b->garray = 0; 5338 b->roworiented = PETSC_TRUE; 5339 5340 /* stuff used for matrix vector multiply */ 5341 b->lvec = PETSC_NULL; 5342 b->Mvctx = PETSC_NULL; 5343 5344 /* stuff for MatGetRow() */ 5345 b->rowindices = 0; 5346 b->rowvalues = 0; 5347 b->getrowactive = PETSC_FALSE; 5348 5349 #if defined(PETSC_HAVE_SPOOLES) 5350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5351 "MatGetFactor_mpiaij_spooles", 5352 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5353 #endif 5354 #if defined(PETSC_HAVE_MUMPS) 5355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5356 "MatGetFactor_aij_mumps", 5357 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5358 #endif 5359 #if defined(PETSC_HAVE_PASTIX) 5360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5361 "MatGetFactor_mpiaij_pastix", 5362 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5363 #endif 5364 #if defined(PETSC_HAVE_SUPERLU_DIST) 5365 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5366 "MatGetFactor_mpiaij_superlu_dist", 5367 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5368 #endif 5369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5370 "MatStoreValues_MPIAIJ", 5371 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5373 "MatRetrieveValues_MPIAIJ", 5374 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5376 "MatGetDiagonalBlock_MPIAIJ", 5377 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5379 "MatIsTranspose_MPIAIJ", 5380 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5382 "MatMPIAIJSetPreallocation_MPIAIJ", 5383 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5384 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5385 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5386 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5387 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5388 "MatDiagonalScaleLocal_MPIAIJ", 5389 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5390 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5391 "MatConvert_MPIAIJ_MPIAIJPERM", 5392 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5393 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5394 "MatConvert_MPIAIJ_MPIAIJCRL", 5395 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5396 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5397 "MatConvert_MPIAIJ_MPISBAIJ", 5398 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5400 "MatMatMult_MPIDense_MPIAIJ", 5401 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5402 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5403 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5404 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5405 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5406 "MatMatMultNumeric_MPIDense_MPIAIJ", 5407 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5408 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5409 PetscFunctionReturn(0); 5410 } 5411 EXTERN_C_END 5412 5413 #undef __FUNCT__ 5414 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5415 /*@ 5416 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5417 and "off-diagonal" part of the matrix in CSR format. 5418 5419 Collective on MPI_Comm 5420 5421 Input Parameters: 5422 + comm - MPI communicator 5423 . m - number of local rows (Cannot be PETSC_DECIDE) 5424 . n - This value should be the same as the local size used in creating the 5425 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5426 calculated if N is given) For square matrices n is almost always m. 5427 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5428 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5429 . i - row indices for "diagonal" portion of matrix 5430 . j - column indices 5431 . a - matrix values 5432 . oi - row indices for "off-diagonal" portion of matrix 5433 . oj - column indices 5434 - oa - matrix values 5435 5436 Output Parameter: 5437 . mat - the matrix 5438 5439 Level: advanced 5440 5441 Notes: 5442 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5443 5444 The i and j indices are 0 based 5445 5446 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5447 5448 This sets local rows and cannot be used to set off-processor values. 5449 5450 You cannot later use MatSetValues() to change values in this matrix. 5451 5452 .keywords: matrix, aij, compressed row, sparse, parallel 5453 5454 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5455 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5456 @*/ 5457 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5458 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5459 { 5460 PetscErrorCode ierr; 5461 Mat_MPIAIJ *maij; 5462 5463 PetscFunctionBegin; 5464 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5465 if (i[0]) { 5466 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5467 } 5468 if (oi[0]) { 5469 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5470 } 5471 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5472 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5473 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5474 maij = (Mat_MPIAIJ*) (*mat)->data; 5475 maij->donotstash = PETSC_TRUE; 5476 (*mat)->preallocated = PETSC_TRUE; 5477 5478 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5479 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5480 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5481 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5482 5483 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5484 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5485 5486 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5487 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5488 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5489 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5490 5491 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5492 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 PetscFunctionReturn(0); 5494 } 5495 5496 /* 5497 Special version for direct calls from Fortran 5498 */ 5499 #include "private/fortranimpl.h" 5500 5501 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5502 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5503 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5504 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5505 #endif 5506 5507 /* Change these macros so can be used in void function */ 5508 #undef CHKERRQ 5509 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5510 #undef SETERRQ2 5511 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5512 #undef SETERRQ 5513 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5514 5515 EXTERN_C_BEGIN 5516 #undef __FUNCT__ 5517 #define __FUNCT__ "matsetvaluesmpiaij_" 5518 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5519 { 5520 Mat mat = *mmat; 5521 PetscInt m = *mm, n = *mn; 5522 InsertMode addv = *maddv; 5523 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5524 PetscScalar value; 5525 PetscErrorCode ierr; 5526 5527 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5528 if (mat->insertmode == NOT_SET_VALUES) { 5529 mat->insertmode = addv; 5530 } 5531 #if defined(PETSC_USE_DEBUG) 5532 else if (mat->insertmode != addv) { 5533 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5534 } 5535 #endif 5536 { 5537 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5538 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5539 PetscBool roworiented = aij->roworiented; 5540 5541 /* Some Variables required in the macro */ 5542 Mat A = aij->A; 5543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5544 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5545 MatScalar *aa = a->a; 5546 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5547 Mat B = aij->B; 5548 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5549 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5550 MatScalar *ba = b->a; 5551 5552 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5553 PetscInt nonew = a->nonew; 5554 MatScalar *ap1,*ap2; 5555 5556 PetscFunctionBegin; 5557 for (i=0; i<m; i++) { 5558 if (im[i] < 0) continue; 5559 #if defined(PETSC_USE_DEBUG) 5560 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5561 #endif 5562 if (im[i] >= rstart && im[i] < rend) { 5563 row = im[i] - rstart; 5564 lastcol1 = -1; 5565 rp1 = aj + ai[row]; 5566 ap1 = aa + ai[row]; 5567 rmax1 = aimax[row]; 5568 nrow1 = ailen[row]; 5569 low1 = 0; 5570 high1 = nrow1; 5571 lastcol2 = -1; 5572 rp2 = bj + bi[row]; 5573 ap2 = ba + bi[row]; 5574 rmax2 = bimax[row]; 5575 nrow2 = bilen[row]; 5576 low2 = 0; 5577 high2 = nrow2; 5578 5579 for (j=0; j<n; j++) { 5580 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5581 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5582 if (in[j] >= cstart && in[j] < cend){ 5583 col = in[j] - cstart; 5584 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5585 } else if (in[j] < 0) continue; 5586 #if defined(PETSC_USE_DEBUG) 5587 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5588 #endif 5589 else { 5590 if (mat->was_assembled) { 5591 if (!aij->colmap) { 5592 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5593 } 5594 #if defined (PETSC_USE_CTABLE) 5595 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5596 col--; 5597 #else 5598 col = aij->colmap[in[j]] - 1; 5599 #endif 5600 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5601 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5602 col = in[j]; 5603 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5604 B = aij->B; 5605 b = (Mat_SeqAIJ*)B->data; 5606 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5607 rp2 = bj + bi[row]; 5608 ap2 = ba + bi[row]; 5609 rmax2 = bimax[row]; 5610 nrow2 = bilen[row]; 5611 low2 = 0; 5612 high2 = nrow2; 5613 bm = aij->B->rmap->n; 5614 ba = b->a; 5615 } 5616 } else col = in[j]; 5617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5618 } 5619 } 5620 } else { 5621 if (!aij->donotstash) { 5622 if (roworiented) { 5623 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5624 } else { 5625 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5626 } 5627 } 5628 } 5629 }} 5630 PetscFunctionReturnVoid(); 5631 } 5632 EXTERN_C_END 5633 5634