1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 const PetscScalar *xx; 575 PetscScalar *bb; 576 #if defined(PETSC_DEBUG) 577 PetscBool found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 if (A->nooffproczerorows) { 608 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 609 nrecvs = nsends; 610 nmax = N; 611 } else { 612 /* inform other processors of number of messages and max length*/ 613 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 614 } 615 616 /* post receives: */ 617 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 618 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 619 for (i=0; i<nrecvs; i++) { 620 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 621 } 622 623 /* do sends: 624 1) starts[i] gives the starting index in svalues for stuff going to 625 the ith processor 626 */ 627 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 628 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 629 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 630 starts[0] = 0; 631 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 for (i=0; i<N; i++) { 633 svalues[starts[owner[i]]++] = rows[i]; 634 } 635 636 starts[0] = 0; 637 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 638 count = 0; 639 for (i=0; i<size; i++) { 640 if (nprocs[2*i+1]) { 641 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 642 } 643 } 644 ierr = PetscFree(starts);CHKERRQ(ierr); 645 646 base = owners[rank]; 647 648 /* wait on receives */ 649 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 650 count = nrecvs; slen = 0; 651 while (count) { 652 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 653 /* unpack receives into our local space */ 654 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 655 source[imdex] = recv_status.MPI_SOURCE; 656 lens[imdex] = n; 657 slen += n; 658 count--; 659 } 660 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 661 662 /* move the data into the send scatter */ 663 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 664 count = 0; 665 for (i=0; i<nrecvs; i++) { 666 values = rvalues + i*nmax; 667 for (j=0; j<lens[i]; j++) { 668 lrows[count++] = values[j] - base; 669 } 670 } 671 ierr = PetscFree(rvalues);CHKERRQ(ierr); 672 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 673 ierr = PetscFree(owner);CHKERRQ(ierr); 674 ierr = PetscFree(nprocs);CHKERRQ(ierr); 675 676 /* fix right hand side if needed */ 677 if (x && b) { 678 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 679 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 680 for (i=0; i<N; i++) { 681 bb[lrows[i]] = diag*xx[lrows[i]]; 682 } 683 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 684 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 685 } 686 /* 687 Zero the required rows. If the "diagonal block" of the matrix 688 is square and the user wishes to set the diagonal we use separate 689 code so that MatSetValues() is not called for each diagonal allocating 690 new memory, thus calling lots of mallocs and slowing things down. 691 692 */ 693 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 694 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 695 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 696 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 697 } else if (diag != 0.0) { 698 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 699 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 702 } 703 for (i = 0; i < slen; i++) { 704 row = lrows[i] + rstart; 705 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 706 } 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 } else { 710 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 711 } 712 ierr = PetscFree(lrows);CHKERRQ(ierr); 713 714 /* wait on sends */ 715 if (nsends) { 716 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 717 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 718 ierr = PetscFree(send_status);CHKERRQ(ierr); 719 } 720 ierr = PetscFree(send_waits);CHKERRQ(ierr); 721 ierr = PetscFree(svalues);CHKERRQ(ierr); 722 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 728 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 729 { 730 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 733 PetscInt i,*owners = A->rmap->range; 734 PetscInt *nprocs,j,idx,nsends,row; 735 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 736 PetscInt *rvalues,count,base,slen,*source; 737 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 738 MPI_Comm comm = ((PetscObject)A)->comm; 739 MPI_Request *send_waits,*recv_waits; 740 MPI_Status recv_status,*send_status; 741 const PetscScalar *xx; 742 PetscScalar *bb; 743 #if defined(PETSC_DEBUG) 744 PetscBool found = PETSC_FALSE; 745 #endif 746 747 PetscFunctionBegin; 748 /* first count number of contributors to each processor */ 749 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 750 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 751 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 752 j = 0; 753 for (i=0; i<N; i++) { 754 if (lastidx > (idx = rows[i])) j = 0; 755 lastidx = idx; 756 for (; j<size; j++) { 757 if (idx >= owners[j] && idx < owners[j+1]) { 758 nprocs[2*j]++; 759 nprocs[2*j+1] = 1; 760 owner[i] = j; 761 #if defined(PETSC_DEBUG) 762 found = PETSC_TRUE; 763 #endif 764 break; 765 } 766 } 767 #if defined(PETSC_DEBUG) 768 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 769 found = PETSC_FALSE; 770 #endif 771 } 772 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 773 774 if (A->nooffproczerorows) { 775 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 776 nrecvs = nsends; 777 nmax = N; 778 } else { 779 /* inform other processors of number of messages and max length*/ 780 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 781 } 782 783 /* post receives: */ 784 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 785 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 786 for (i=0; i<nrecvs; i++) { 787 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 788 } 789 790 /* do sends: 791 1) starts[i] gives the starting index in svalues for stuff going to 792 the ith processor 793 */ 794 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 795 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 796 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 797 starts[0] = 0; 798 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 799 for (i=0; i<N; i++) { 800 svalues[starts[owner[i]]++] = rows[i]; 801 } 802 803 starts[0] = 0; 804 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 805 count = 0; 806 for (i=0; i<size; i++) { 807 if (nprocs[2*i+1]) { 808 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 809 } 810 } 811 ierr = PetscFree(starts);CHKERRQ(ierr); 812 813 base = owners[rank]; 814 815 /* wait on receives */ 816 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 817 count = nrecvs; slen = 0; 818 while (count) { 819 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 820 /* unpack receives into our local space */ 821 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 822 source[imdex] = recv_status.MPI_SOURCE; 823 lens[imdex] = n; 824 slen += n; 825 count--; 826 } 827 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 828 829 /* move the data into the send scatter */ 830 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 831 count = 0; 832 for (i=0; i<nrecvs; i++) { 833 values = rvalues + i*nmax; 834 for (j=0; j<lens[i]; j++) { 835 lrows[count++] = values[j] - base; 836 } 837 } 838 ierr = PetscFree(rvalues);CHKERRQ(ierr); 839 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 840 ierr = PetscFree(owner);CHKERRQ(ierr); 841 ierr = PetscFree(nprocs);CHKERRQ(ierr); 842 843 /* fix right hand side if needed */ 844 if (x && b) { 845 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 846 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 847 for (i=0; i<N; i++) { 848 bb[lrows[i]] = diag*xx[lrows[i]]; 849 } 850 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 851 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 852 } 853 /* 854 Zero the required rows. If the "diagonal block" of the matrix 855 is square and the user wishes to set the diagonal we use separate 856 code so that MatSetValues() is not called for each diagonal allocating 857 new memory, thus calling lots of mallocs and slowing things down. 858 859 */ 860 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 861 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 862 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 863 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 864 } else if (diag != 0.0) { 865 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 866 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 867 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 868 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 869 } 870 for (i = 0; i < slen; i++) { 871 row = lrows[i] + rstart; 872 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 873 } 874 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 875 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 876 } else { 877 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 } 879 ierr = PetscFree(lrows);CHKERRQ(ierr); 880 881 /* wait on sends */ 882 if (nsends) { 883 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 884 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 885 ierr = PetscFree(send_status);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(send_waits);CHKERRQ(ierr); 888 ierr = PetscFree(svalues);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMult_MPIAIJ" 895 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 896 { 897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 898 PetscErrorCode ierr; 899 PetscInt nt; 900 901 PetscFunctionBegin; 902 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 903 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 904 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 906 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 913 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 914 { 915 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 916 PetscErrorCode ierr; 917 918 PetscFunctionBegin; 919 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatMultAdd_MPIAIJ" 925 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 926 { 927 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 928 PetscErrorCode ierr; 929 930 PetscFunctionBegin; 931 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 933 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 934 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } 937 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 940 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 941 { 942 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 943 PetscErrorCode ierr; 944 PetscBool merged; 945 946 PetscFunctionBegin; 947 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 948 /* do nondiagonal part */ 949 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 950 if (!merged) { 951 /* send it on its way */ 952 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 953 /* do local part */ 954 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 955 /* receive remote parts: note this assumes the values are not actually */ 956 /* added in yy until the next line, */ 957 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 958 } else { 959 /* do local part */ 960 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 961 /* send it on its way */ 962 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 963 /* values actually were received in the Begin() but we need to call this nop */ 964 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 EXTERN_C_BEGIN 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 972 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 973 { 974 MPI_Comm comm; 975 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 976 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 977 IS Me,Notme; 978 PetscErrorCode ierr; 979 PetscInt M,N,first,last,*notme,i; 980 PetscMPIInt size; 981 982 PetscFunctionBegin; 983 984 /* Easy test: symmetric diagonal block */ 985 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 986 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 987 if (!*f) PetscFunctionReturn(0); 988 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 989 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 990 if (size == 1) PetscFunctionReturn(0); 991 992 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 993 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 994 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 995 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 996 for (i=0; i<first; i++) notme[i] = i; 997 for (i=last; i<M; i++) notme[i-last+first] = i; 998 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 999 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1000 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1001 Aoff = Aoffs[0]; 1002 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1003 Boff = Boffs[0]; 1004 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1005 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1006 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1007 ierr = ISDestroy(Me);CHKERRQ(ierr); 1008 ierr = ISDestroy(Notme);CHKERRQ(ierr); 1009 1010 PetscFunctionReturn(0); 1011 } 1012 EXTERN_C_END 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1016 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1017 { 1018 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1019 PetscErrorCode ierr; 1020 1021 PetscFunctionBegin; 1022 /* do nondiagonal part */ 1023 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1024 /* send it on its way */ 1025 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1026 /* do local part */ 1027 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1028 /* receive remote parts */ 1029 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 /* 1034 This only works correctly for square matrices where the subblock A->A is the 1035 diagonal block 1036 */ 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1039 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1040 { 1041 PetscErrorCode ierr; 1042 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1043 1044 PetscFunctionBegin; 1045 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1046 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1047 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatScale_MPIAIJ" 1053 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1054 { 1055 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1060 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatDestroy_MPIAIJ" 1066 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1067 { 1068 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 #if defined(PETSC_USE_LOG) 1073 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1074 #endif 1075 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1076 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 1077 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 1078 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 1079 #if defined (PETSC_USE_CTABLE) 1080 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 1081 #else 1082 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1083 #endif 1084 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1085 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 1086 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 1087 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1088 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1089 ierr = PetscFree(aij);CHKERRQ(ierr); 1090 1091 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1092 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1093 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1094 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1096 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1098 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1099 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1105 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1106 { 1107 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1108 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1109 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1110 PetscErrorCode ierr; 1111 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1112 int fd; 1113 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1114 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1115 PetscScalar *column_values; 1116 PetscInt message_count,flowcontrolcount; 1117 1118 PetscFunctionBegin; 1119 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1120 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1121 nz = A->nz + B->nz; 1122 if (!rank) { 1123 header[0] = MAT_FILE_CLASSID; 1124 header[1] = mat->rmap->N; 1125 header[2] = mat->cmap->N; 1126 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1127 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1128 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1129 /* get largest number of rows any processor has */ 1130 rlen = mat->rmap->n; 1131 range = mat->rmap->range; 1132 for (i=1; i<size; i++) { 1133 rlen = PetscMax(rlen,range[i+1] - range[i]); 1134 } 1135 } else { 1136 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1137 rlen = mat->rmap->n; 1138 } 1139 1140 /* load up the local row counts */ 1141 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1142 for (i=0; i<mat->rmap->n; i++) { 1143 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1144 } 1145 1146 /* store the row lengths to the file */ 1147 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1148 if (!rank) { 1149 MPI_Status status; 1150 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1151 for (i=1; i<size; i++) { 1152 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1153 rlen = range[i+1] - range[i]; 1154 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1155 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1156 } 1157 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1158 } else { 1159 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1160 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1161 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1162 } 1163 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1164 1165 /* load up the local column indices */ 1166 nzmax = nz; /* )th processor needs space a largest processor needs */ 1167 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1168 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1169 cnt = 0; 1170 for (i=0; i<mat->rmap->n; i++) { 1171 for (j=B->i[i]; j<B->i[i+1]; j++) { 1172 if ( (col = garray[B->j[j]]) > cstart) break; 1173 column_indices[cnt++] = col; 1174 } 1175 for (k=A->i[i]; k<A->i[i+1]; k++) { 1176 column_indices[cnt++] = A->j[k] + cstart; 1177 } 1178 for (; j<B->i[i+1]; j++) { 1179 column_indices[cnt++] = garray[B->j[j]]; 1180 } 1181 } 1182 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1183 1184 /* store the column indices to the file */ 1185 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1186 if (!rank) { 1187 MPI_Status status; 1188 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1189 for (i=1; i<size; i++) { 1190 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1191 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1192 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1193 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1194 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1195 } 1196 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1197 } else { 1198 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1199 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1200 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1201 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1202 } 1203 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1204 1205 /* load up the local column values */ 1206 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1207 cnt = 0; 1208 for (i=0; i<mat->rmap->n; i++) { 1209 for (j=B->i[i]; j<B->i[i+1]; j++) { 1210 if ( garray[B->j[j]] > cstart) break; 1211 column_values[cnt++] = B->a[j]; 1212 } 1213 for (k=A->i[i]; k<A->i[i+1]; k++) { 1214 column_values[cnt++] = A->a[k]; 1215 } 1216 for (; j<B->i[i+1]; j++) { 1217 column_values[cnt++] = B->a[j]; 1218 } 1219 } 1220 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1221 1222 /* store the column values to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1230 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1231 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1238 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_values);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1247 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1248 { 1249 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1250 PetscErrorCode ierr; 1251 PetscMPIInt rank = aij->rank,size = aij->size; 1252 PetscBool isdraw,iascii,isbinary; 1253 PetscViewer sviewer; 1254 PetscViewerFormat format; 1255 1256 PetscFunctionBegin; 1257 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1258 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1259 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1260 if (iascii) { 1261 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1262 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1263 MatInfo info; 1264 PetscBool inodes; 1265 1266 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1267 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1268 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1269 if (!inodes) { 1270 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1271 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1274 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1275 } 1276 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1277 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1278 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1279 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1280 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1281 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1282 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1285 PetscInt inodecount,inodelimit,*inodes; 1286 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1287 if (inodes) { 1288 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1291 } 1292 PetscFunctionReturn(0); 1293 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1294 PetscFunctionReturn(0); 1295 } 1296 } else if (isbinary) { 1297 if (size == 1) { 1298 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1299 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1300 } else { 1301 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1302 } 1303 PetscFunctionReturn(0); 1304 } else if (isdraw) { 1305 PetscDraw draw; 1306 PetscBool isnull; 1307 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1308 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1309 } 1310 1311 if (size == 1) { 1312 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1313 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1314 } else { 1315 /* assemble the entire matrix onto first processor. */ 1316 Mat A; 1317 Mat_SeqAIJ *Aloc; 1318 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1319 MatScalar *a; 1320 1321 if (mat->rmap->N > 1024) { 1322 PetscBool flg = PETSC_FALSE; 1323 1324 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1325 if (!flg) { 1326 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1327 } 1328 } 1329 1330 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1331 if (!rank) { 1332 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1333 } else { 1334 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1335 } 1336 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1337 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1338 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1339 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1340 1341 /* copy over the A part */ 1342 Aloc = (Mat_SeqAIJ*)aij->A->data; 1343 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1344 row = mat->rmap->rstart; 1345 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1346 for (i=0; i<m; i++) { 1347 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1348 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1349 } 1350 aj = Aloc->j; 1351 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1352 1353 /* copy over the B part */ 1354 Aloc = (Mat_SeqAIJ*)aij->B->data; 1355 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1356 row = mat->rmap->rstart; 1357 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1358 ct = cols; 1359 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1360 for (i=0; i<m; i++) { 1361 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1362 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1363 } 1364 ierr = PetscFree(ct);CHKERRQ(ierr); 1365 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1366 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1367 /* 1368 Everyone has to call to draw the matrix since the graphics waits are 1369 synchronized across all processors that share the PetscDraw object 1370 */ 1371 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1372 if (!rank) { 1373 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1374 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1375 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1376 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1377 } 1378 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1379 ierr = MatDestroy(A);CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatView_MPIAIJ" 1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1387 { 1388 PetscErrorCode ierr; 1389 PetscBool iascii,isdraw,issocket,isbinary; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1393 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1394 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1396 if (iascii || isdraw || isbinary || issocket) { 1397 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1398 } else { 1399 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSOR_MPIAIJ" 1406 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1407 { 1408 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1409 PetscErrorCode ierr; 1410 Vec bb1 = 0; 1411 PetscBool hasop; 1412 1413 PetscFunctionBegin; 1414 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1415 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1416 } 1417 1418 if (flag == SOR_APPLY_UPPER) { 1419 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1424 if (flag & SOR_ZERO_INITIAL_GUESS) { 1425 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1426 its--; 1427 } 1428 1429 while (its--) { 1430 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1431 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1432 1433 /* update rhs: bb1 = bb - B*x */ 1434 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1435 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1436 1437 /* local sweep */ 1438 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1439 } 1440 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1441 if (flag & SOR_ZERO_INITIAL_GUESS) { 1442 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1443 its--; 1444 } 1445 while (its--) { 1446 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1448 1449 /* update rhs: bb1 = bb - B*x */ 1450 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1452 1453 /* local sweep */ 1454 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1455 } 1456 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1457 if (flag & SOR_ZERO_INITIAL_GUESS) { 1458 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1459 its--; 1460 } 1461 while (its--) { 1462 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1464 1465 /* update rhs: bb1 = bb - B*x */ 1466 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1468 1469 /* local sweep */ 1470 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1471 } 1472 } else if (flag & SOR_EISENSTAT) { 1473 Vec xx1; 1474 1475 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1477 1478 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1480 if (!mat->diag) { 1481 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1483 } 1484 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1485 if (hasop) { 1486 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1487 } else { 1488 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1489 } 1490 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1491 1492 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1493 1494 /* local sweep */ 1495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1496 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1497 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1498 } else { 1499 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1500 } 1501 1502 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatPermute_MPIAIJ" 1508 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1509 { 1510 MPI_Comm comm,pcomm; 1511 PetscInt first,local_size,nrows; 1512 const PetscInt *rows; 1513 PetscMPIInt size; 1514 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1515 PetscErrorCode ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1519 /* make a collective version of 'rowp' */ 1520 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1521 if (pcomm==comm) { 1522 crowp = rowp; 1523 } else { 1524 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1525 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1526 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1527 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1528 } 1529 /* collect the global row permutation and invert it */ 1530 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1531 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1532 if (pcomm!=comm) { 1533 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1534 } 1535 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1536 /* get the local target indices */ 1537 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1538 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1539 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1540 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1541 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1542 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1543 /* the column permutation is so much easier; 1544 make a local version of 'colp' and invert it */ 1545 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1546 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1547 if (size==1) { 1548 lcolp = colp; 1549 } else { 1550 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1551 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1552 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1553 } 1554 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1555 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1556 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1557 if (size>1) { 1558 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1559 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1560 } 1561 /* now we just get the submatrix */ 1562 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1563 /* clean up */ 1564 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1565 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1571 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1572 { 1573 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1574 Mat A = mat->A,B = mat->B; 1575 PetscErrorCode ierr; 1576 PetscReal isend[5],irecv[5]; 1577 1578 PetscFunctionBegin; 1579 info->block_size = 1.0; 1580 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1581 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1582 isend[3] = info->memory; isend[4] = info->mallocs; 1583 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1584 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1585 isend[3] += info->memory; isend[4] += info->mallocs; 1586 if (flag == MAT_LOCAL) { 1587 info->nz_used = isend[0]; 1588 info->nz_allocated = isend[1]; 1589 info->nz_unneeded = isend[2]; 1590 info->memory = isend[3]; 1591 info->mallocs = isend[4]; 1592 } else if (flag == MAT_GLOBAL_MAX) { 1593 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1594 info->nz_used = irecv[0]; 1595 info->nz_allocated = irecv[1]; 1596 info->nz_unneeded = irecv[2]; 1597 info->memory = irecv[3]; 1598 info->mallocs = irecv[4]; 1599 } else if (flag == MAT_GLOBAL_SUM) { 1600 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1601 info->nz_used = irecv[0]; 1602 info->nz_allocated = irecv[1]; 1603 info->nz_unneeded = irecv[2]; 1604 info->memory = irecv[3]; 1605 info->mallocs = irecv[4]; 1606 } 1607 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1608 info->fill_ratio_needed = 0; 1609 info->factor_mallocs = 0; 1610 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatSetOption_MPIAIJ" 1616 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1617 { 1618 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1619 PetscErrorCode ierr; 1620 1621 PetscFunctionBegin; 1622 switch (op) { 1623 case MAT_NEW_NONZERO_LOCATIONS: 1624 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1625 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1626 case MAT_KEEP_NONZERO_PATTERN: 1627 case MAT_NEW_NONZERO_LOCATION_ERR: 1628 case MAT_USE_INODES: 1629 case MAT_IGNORE_ZERO_ENTRIES: 1630 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1631 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1632 break; 1633 case MAT_ROW_ORIENTED: 1634 a->roworiented = flg; 1635 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1636 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1637 break; 1638 case MAT_NEW_DIAGONALS: 1639 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1640 break; 1641 case MAT_IGNORE_OFF_PROC_ENTRIES: 1642 a->donotstash = PETSC_TRUE; 1643 break; 1644 case MAT_SPD: 1645 A->spd_set = PETSC_TRUE; 1646 A->spd = flg; 1647 if (flg) { 1648 A->symmetric = PETSC_TRUE; 1649 A->structurally_symmetric = PETSC_TRUE; 1650 A->symmetric_set = PETSC_TRUE; 1651 A->structurally_symmetric_set = PETSC_TRUE; 1652 } 1653 break; 1654 case MAT_SYMMETRIC: 1655 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1656 break; 1657 case MAT_STRUCTURALLY_SYMMETRIC: 1658 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1659 break; 1660 case MAT_HERMITIAN: 1661 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1662 break; 1663 case MAT_SYMMETRY_ETERNAL: 1664 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1665 break; 1666 default: 1667 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1668 } 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatGetRow_MPIAIJ" 1674 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1675 { 1676 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1677 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1678 PetscErrorCode ierr; 1679 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1680 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1681 PetscInt *cmap,*idx_p; 1682 1683 PetscFunctionBegin; 1684 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1685 mat->getrowactive = PETSC_TRUE; 1686 1687 if (!mat->rowvalues && (idx || v)) { 1688 /* 1689 allocate enough space to hold information from the longest row. 1690 */ 1691 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1692 PetscInt max = 1,tmp; 1693 for (i=0; i<matin->rmap->n; i++) { 1694 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1695 if (max < tmp) { max = tmp; } 1696 } 1697 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1698 } 1699 1700 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1701 lrow = row - rstart; 1702 1703 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1704 if (!v) {pvA = 0; pvB = 0;} 1705 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1706 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1707 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1708 nztot = nzA + nzB; 1709 1710 cmap = mat->garray; 1711 if (v || idx) { 1712 if (nztot) { 1713 /* Sort by increasing column numbers, assuming A and B already sorted */ 1714 PetscInt imark = -1; 1715 if (v) { 1716 *v = v_p = mat->rowvalues; 1717 for (i=0; i<nzB; i++) { 1718 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1719 else break; 1720 } 1721 imark = i; 1722 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1723 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1724 } 1725 if (idx) { 1726 *idx = idx_p = mat->rowindices; 1727 if (imark > -1) { 1728 for (i=0; i<imark; i++) { 1729 idx_p[i] = cmap[cworkB[i]]; 1730 } 1731 } else { 1732 for (i=0; i<nzB; i++) { 1733 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1734 else break; 1735 } 1736 imark = i; 1737 } 1738 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1739 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1740 } 1741 } else { 1742 if (idx) *idx = 0; 1743 if (v) *v = 0; 1744 } 1745 } 1746 *nz = nztot; 1747 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1748 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1754 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1755 { 1756 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1757 1758 PetscFunctionBegin; 1759 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1760 aij->getrowactive = PETSC_FALSE; 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatNorm_MPIAIJ" 1766 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1767 { 1768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1769 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1770 PetscErrorCode ierr; 1771 PetscInt i,j,cstart = mat->cmap->rstart; 1772 PetscReal sum = 0.0; 1773 MatScalar *v; 1774 1775 PetscFunctionBegin; 1776 if (aij->size == 1) { 1777 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1778 } else { 1779 if (type == NORM_FROBENIUS) { 1780 v = amat->a; 1781 for (i=0; i<amat->nz; i++) { 1782 #if defined(PETSC_USE_COMPLEX) 1783 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1784 #else 1785 sum += (*v)*(*v); v++; 1786 #endif 1787 } 1788 v = bmat->a; 1789 for (i=0; i<bmat->nz; i++) { 1790 #if defined(PETSC_USE_COMPLEX) 1791 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1792 #else 1793 sum += (*v)*(*v); v++; 1794 #endif 1795 } 1796 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1797 *norm = sqrt(*norm); 1798 } else if (type == NORM_1) { /* max column norm */ 1799 PetscReal *tmp,*tmp2; 1800 PetscInt *jj,*garray = aij->garray; 1801 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1802 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1803 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1804 *norm = 0.0; 1805 v = amat->a; jj = amat->j; 1806 for (j=0; j<amat->nz; j++) { 1807 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1808 } 1809 v = bmat->a; jj = bmat->j; 1810 for (j=0; j<bmat->nz; j++) { 1811 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1812 } 1813 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1814 for (j=0; j<mat->cmap->N; j++) { 1815 if (tmp2[j] > *norm) *norm = tmp2[j]; 1816 } 1817 ierr = PetscFree(tmp);CHKERRQ(ierr); 1818 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1819 } else if (type == NORM_INFINITY) { /* max row norm */ 1820 PetscReal ntemp = 0.0; 1821 for (j=0; j<aij->A->rmap->n; j++) { 1822 v = amat->a + amat->i[j]; 1823 sum = 0.0; 1824 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1825 sum += PetscAbsScalar(*v); v++; 1826 } 1827 v = bmat->a + bmat->i[j]; 1828 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1829 sum += PetscAbsScalar(*v); v++; 1830 } 1831 if (sum > ntemp) ntemp = sum; 1832 } 1833 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1834 } else { 1835 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1836 } 1837 } 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatTranspose_MPIAIJ" 1843 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1844 { 1845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1846 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1847 PetscErrorCode ierr; 1848 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1849 PetscInt cstart=A->cmap->rstart,ncol; 1850 Mat B; 1851 MatScalar *array; 1852 1853 PetscFunctionBegin; 1854 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1855 1856 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1857 ai = Aloc->i; aj = Aloc->j; 1858 bi = Bloc->i; bj = Bloc->j; 1859 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1860 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1861 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1862 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1863 for (i=0; i<ai[ma]; i++){ 1864 d_nnz[aj[i]] ++; 1865 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1866 } 1867 1868 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1869 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1870 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1871 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1872 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1873 } else { 1874 B = *matout; 1875 } 1876 1877 /* copy over the A part */ 1878 array = Aloc->a; 1879 row = A->rmap->rstart; 1880 for (i=0; i<ma; i++) { 1881 ncol = ai[i+1]-ai[i]; 1882 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1883 row++; array += ncol; aj += ncol; 1884 } 1885 aj = Aloc->j; 1886 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1887 1888 /* copy over the B part */ 1889 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1890 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1891 array = Bloc->a; 1892 row = A->rmap->rstart; 1893 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1894 cols_tmp = cols; 1895 for (i=0; i<mb; i++) { 1896 ncol = bi[i+1]-bi[i]; 1897 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1898 row++; array += ncol; cols_tmp += ncol; 1899 } 1900 ierr = PetscFree(cols);CHKERRQ(ierr); 1901 1902 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1903 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1905 *matout = B; 1906 } else { 1907 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1914 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1915 { 1916 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1917 Mat a = aij->A,b = aij->B; 1918 PetscErrorCode ierr; 1919 PetscInt s1,s2,s3; 1920 1921 PetscFunctionBegin; 1922 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1923 if (rr) { 1924 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1925 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1926 /* Overlap communication with computation. */ 1927 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1928 } 1929 if (ll) { 1930 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1931 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1932 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1933 } 1934 /* scale the diagonal block */ 1935 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1936 1937 if (rr) { 1938 /* Do a scatter end and then right scale the off-diagonal block */ 1939 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1940 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1941 } 1942 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1948 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1949 { 1950 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1955 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1956 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1957 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1962 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1963 { 1964 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatEqual_MPIAIJ" 1974 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1975 { 1976 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1977 Mat a,b,c,d; 1978 PetscBool flg; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 a = matA->A; b = matA->B; 1983 c = matB->A; d = matB->B; 1984 1985 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1986 if (flg) { 1987 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1988 } 1989 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatCopy_MPIAIJ" 1995 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1996 { 1997 PetscErrorCode ierr; 1998 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1999 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2000 2001 PetscFunctionBegin; 2002 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2003 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2004 /* because of the column compression in the off-processor part of the matrix a->B, 2005 the number of columns in a->B and b->B may be different, hence we cannot call 2006 the MatCopy() directly on the two parts. If need be, we can provide a more 2007 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2008 then copying the submatrices */ 2009 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2010 } else { 2011 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2012 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2013 } 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2019 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2020 { 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatAXPY_MPIAIJ" 2030 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2031 { 2032 PetscErrorCode ierr; 2033 PetscInt i; 2034 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2035 PetscBLASInt bnz,one=1; 2036 Mat_SeqAIJ *x,*y; 2037 2038 PetscFunctionBegin; 2039 if (str == SAME_NONZERO_PATTERN) { 2040 PetscScalar alpha = a; 2041 x = (Mat_SeqAIJ *)xx->A->data; 2042 y = (Mat_SeqAIJ *)yy->A->data; 2043 bnz = PetscBLASIntCast(x->nz); 2044 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2045 x = (Mat_SeqAIJ *)xx->B->data; 2046 y = (Mat_SeqAIJ *)yy->B->data; 2047 bnz = PetscBLASIntCast(x->nz); 2048 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2049 } else if (str == SUBSET_NONZERO_PATTERN) { 2050 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2051 2052 x = (Mat_SeqAIJ *)xx->B->data; 2053 y = (Mat_SeqAIJ *)yy->B->data; 2054 if (y->xtoy && y->XtoY != xx->B) { 2055 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2056 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2057 } 2058 if (!y->xtoy) { /* get xtoy */ 2059 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2060 y->XtoY = xx->B; 2061 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2062 } 2063 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2064 } else { 2065 Mat B; 2066 PetscInt *nnz_d,*nnz_o; 2067 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2068 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2069 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2070 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2071 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2072 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2073 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2074 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2075 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2076 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2077 ierr = MatHeaderReplace(Y,B); 2078 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2079 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatConjugate_MPIAIJ" 2088 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 2089 { 2090 #if defined(PETSC_USE_COMPLEX) 2091 PetscErrorCode ierr; 2092 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2093 2094 PetscFunctionBegin; 2095 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2096 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2097 #else 2098 PetscFunctionBegin; 2099 #endif 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatRealPart_MPIAIJ" 2105 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2106 { 2107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2112 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2113 PetscFunctionReturn(0); 2114 } 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2118 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2125 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #ifdef PETSC_HAVE_PBGL 2130 2131 #include <boost/parallel/mpi/bsp_process_group.hpp> 2132 #include <boost/graph/distributed/ilu_default_graph.hpp> 2133 #include <boost/graph/distributed/ilu_0_block.hpp> 2134 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2135 #include <boost/graph/distributed/petsc/interface.hpp> 2136 #include <boost/multi_array.hpp> 2137 #include <boost/parallel/distributed_property_map->hpp> 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2141 /* 2142 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2143 */ 2144 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2145 { 2146 namespace petsc = boost::distributed::petsc; 2147 2148 namespace graph_dist = boost::graph::distributed; 2149 using boost::graph::distributed::ilu_default::process_group_type; 2150 using boost::graph::ilu_permuted; 2151 2152 PetscBool row_identity, col_identity; 2153 PetscContainer c; 2154 PetscInt m, n, M, N; 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2159 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2160 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2161 if (!row_identity || !col_identity) { 2162 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2163 } 2164 2165 process_group_type pg; 2166 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2167 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2168 lgraph_type& level_graph = *lgraph_p; 2169 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2170 2171 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2172 ilu_permuted(level_graph); 2173 2174 /* put together the new matrix */ 2175 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2176 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2177 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2178 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2179 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2180 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2182 2183 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2184 ierr = PetscContainerSetPointer(c, lgraph_p); 2185 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2191 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2192 { 2193 PetscFunctionBegin; 2194 PetscFunctionReturn(0); 2195 } 2196 2197 #undef __FUNCT__ 2198 #define __FUNCT__ "MatSolve_MPIAIJ" 2199 /* 2200 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2201 */ 2202 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2203 { 2204 namespace graph_dist = boost::graph::distributed; 2205 2206 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2207 lgraph_type* lgraph_p; 2208 PetscContainer c; 2209 PetscErrorCode ierr; 2210 2211 PetscFunctionBegin; 2212 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2213 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2214 ierr = VecCopy(b, x);CHKERRQ(ierr); 2215 2216 PetscScalar* array_x; 2217 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2218 PetscInt sx; 2219 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2220 2221 PetscScalar* array_b; 2222 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2223 PetscInt sb; 2224 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2225 2226 lgraph_type& level_graph = *lgraph_p; 2227 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2228 2229 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2230 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2231 ref_x(array_x, boost::extents[num_vertices(graph)]); 2232 2233 typedef boost::iterator_property_map<array_ref_type::iterator, 2234 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2235 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2236 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2237 2238 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2239 2240 PetscFunctionReturn(0); 2241 } 2242 #endif 2243 2244 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2245 PetscInt nzlocal,nsends,nrecvs; 2246 PetscMPIInt *send_rank,*recv_rank; 2247 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2248 PetscScalar *sbuf_a,**rbuf_a; 2249 PetscErrorCode (*MatDestroy)(Mat); 2250 } Mat_Redundant; 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2254 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2255 { 2256 PetscErrorCode ierr; 2257 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2258 PetscInt i; 2259 2260 PetscFunctionBegin; 2261 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2262 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2263 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2264 for (i=0; i<redund->nrecvs; i++){ 2265 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2266 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2267 } 2268 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2269 ierr = PetscFree(redund);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatDestroy_MatRedundant" 2275 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2276 { 2277 PetscErrorCode ierr; 2278 PetscContainer container; 2279 Mat_Redundant *redund=PETSC_NULL; 2280 2281 PetscFunctionBegin; 2282 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2283 if (container) { 2284 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2285 } else { 2286 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2287 } 2288 A->ops->destroy = redund->MatDestroy; 2289 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2290 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2291 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2297 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2298 { 2299 PetscMPIInt rank,size; 2300 MPI_Comm comm=((PetscObject)mat)->comm; 2301 PetscErrorCode ierr; 2302 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2303 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2304 PetscInt *rowrange=mat->rmap->range; 2305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2306 Mat A=aij->A,B=aij->B,C=*matredundant; 2307 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2308 PetscScalar *sbuf_a; 2309 PetscInt nzlocal=a->nz+b->nz; 2310 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2311 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2312 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2313 MatScalar *aworkA,*aworkB; 2314 PetscScalar *vals; 2315 PetscMPIInt tag1,tag2,tag3,imdex; 2316 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2317 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2318 MPI_Status recv_status,*send_status; 2319 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2320 PetscInt **rbuf_j=PETSC_NULL; 2321 PetscScalar **rbuf_a=PETSC_NULL; 2322 Mat_Redundant *redund=PETSC_NULL; 2323 PetscContainer container; 2324 2325 PetscFunctionBegin; 2326 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2327 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2328 2329 if (reuse == MAT_REUSE_MATRIX) { 2330 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2331 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2332 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2333 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2334 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2335 if (container) { 2336 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2337 } else { 2338 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2339 } 2340 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2341 2342 nsends = redund->nsends; 2343 nrecvs = redund->nrecvs; 2344 send_rank = redund->send_rank; 2345 recv_rank = redund->recv_rank; 2346 sbuf_nz = redund->sbuf_nz; 2347 rbuf_nz = redund->rbuf_nz; 2348 sbuf_j = redund->sbuf_j; 2349 sbuf_a = redund->sbuf_a; 2350 rbuf_j = redund->rbuf_j; 2351 rbuf_a = redund->rbuf_a; 2352 } 2353 2354 if (reuse == MAT_INITIAL_MATRIX){ 2355 PetscMPIInt subrank,subsize; 2356 PetscInt nleftover,np_subcomm; 2357 /* get the destination processors' id send_rank, nsends and nrecvs */ 2358 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2359 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2360 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2361 np_subcomm = size/nsubcomm; 2362 nleftover = size - nsubcomm*np_subcomm; 2363 nsends = 0; nrecvs = 0; 2364 for (i=0; i<size; i++){ /* i=rank*/ 2365 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2366 send_rank[nsends] = i; nsends++; 2367 recv_rank[nrecvs++] = i; 2368 } 2369 } 2370 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2371 i = size-nleftover-1; 2372 j = 0; 2373 while (j < nsubcomm - nleftover){ 2374 send_rank[nsends++] = i; 2375 i--; j++; 2376 } 2377 } 2378 2379 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2380 for (i=0; i<nleftover; i++){ 2381 recv_rank[nrecvs++] = size-nleftover+i; 2382 } 2383 } 2384 2385 /* allocate sbuf_j, sbuf_a */ 2386 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2387 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2388 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2389 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2390 2391 /* copy mat's local entries into the buffers */ 2392 if (reuse == MAT_INITIAL_MATRIX){ 2393 rownz_max = 0; 2394 rptr = sbuf_j; 2395 cols = sbuf_j + rend-rstart + 1; 2396 vals = sbuf_a; 2397 rptr[0] = 0; 2398 for (i=0; i<rend-rstart; i++){ 2399 row = i + rstart; 2400 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2401 ncols = nzA + nzB; 2402 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2403 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2404 /* load the column indices for this row into cols */ 2405 lwrite = 0; 2406 for (l=0; l<nzB; l++) { 2407 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2408 vals[lwrite] = aworkB[l]; 2409 cols[lwrite++] = ctmp; 2410 } 2411 } 2412 for (l=0; l<nzA; l++){ 2413 vals[lwrite] = aworkA[l]; 2414 cols[lwrite++] = cstart + cworkA[l]; 2415 } 2416 for (l=0; l<nzB; l++) { 2417 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2418 vals[lwrite] = aworkB[l]; 2419 cols[lwrite++] = ctmp; 2420 } 2421 } 2422 vals += ncols; 2423 cols += ncols; 2424 rptr[i+1] = rptr[i] + ncols; 2425 if (rownz_max < ncols) rownz_max = ncols; 2426 } 2427 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2428 } else { /* only copy matrix values into sbuf_a */ 2429 rptr = sbuf_j; 2430 vals = sbuf_a; 2431 rptr[0] = 0; 2432 for (i=0; i<rend-rstart; i++){ 2433 row = i + rstart; 2434 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2435 ncols = nzA + nzB; 2436 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2437 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2438 lwrite = 0; 2439 for (l=0; l<nzB; l++) { 2440 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2441 } 2442 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2443 for (l=0; l<nzB; l++) { 2444 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2445 } 2446 vals += ncols; 2447 rptr[i+1] = rptr[i] + ncols; 2448 } 2449 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2450 2451 /* send nzlocal to others, and recv other's nzlocal */ 2452 /*--------------------------------------------------*/ 2453 if (reuse == MAT_INITIAL_MATRIX){ 2454 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2455 s_waits2 = s_waits3 + nsends; 2456 s_waits1 = s_waits2 + nsends; 2457 r_waits1 = s_waits1 + nsends; 2458 r_waits2 = r_waits1 + nrecvs; 2459 r_waits3 = r_waits2 + nrecvs; 2460 } else { 2461 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2462 r_waits3 = s_waits3 + nsends; 2463 } 2464 2465 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2466 if (reuse == MAT_INITIAL_MATRIX){ 2467 /* get new tags to keep the communication clean */ 2468 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2469 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2470 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2471 2472 /* post receives of other's nzlocal */ 2473 for (i=0; i<nrecvs; i++){ 2474 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2475 } 2476 /* send nzlocal to others */ 2477 for (i=0; i<nsends; i++){ 2478 sbuf_nz[i] = nzlocal; 2479 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2480 } 2481 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2482 count = nrecvs; 2483 while (count) { 2484 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2485 recv_rank[imdex] = recv_status.MPI_SOURCE; 2486 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2487 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2488 2489 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2490 rbuf_nz[imdex] += i + 2; 2491 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2492 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2493 count--; 2494 } 2495 /* wait on sends of nzlocal */ 2496 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2497 /* send mat->i,j to others, and recv from other's */ 2498 /*------------------------------------------------*/ 2499 for (i=0; i<nsends; i++){ 2500 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2501 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2502 } 2503 /* wait on receives of mat->i,j */ 2504 /*------------------------------*/ 2505 count = nrecvs; 2506 while (count) { 2507 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2508 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2509 count--; 2510 } 2511 /* wait on sends of mat->i,j */ 2512 /*---------------------------*/ 2513 if (nsends) { 2514 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2515 } 2516 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2517 2518 /* post receives, send and receive mat->a */ 2519 /*----------------------------------------*/ 2520 for (imdex=0; imdex<nrecvs; imdex++) { 2521 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2522 } 2523 for (i=0; i<nsends; i++){ 2524 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2525 } 2526 count = nrecvs; 2527 while (count) { 2528 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2529 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2530 count--; 2531 } 2532 if (nsends) { 2533 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2534 } 2535 2536 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2537 2538 /* create redundant matrix */ 2539 /*-------------------------*/ 2540 if (reuse == MAT_INITIAL_MATRIX){ 2541 /* compute rownz_max for preallocation */ 2542 for (imdex=0; imdex<nrecvs; imdex++){ 2543 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2544 rptr = rbuf_j[imdex]; 2545 for (i=0; i<j; i++){ 2546 ncols = rptr[i+1] - rptr[i]; 2547 if (rownz_max < ncols) rownz_max = ncols; 2548 } 2549 } 2550 2551 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2552 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2553 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2554 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2555 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2556 } else { 2557 C = *matredundant; 2558 } 2559 2560 /* insert local matrix entries */ 2561 rptr = sbuf_j; 2562 cols = sbuf_j + rend-rstart + 1; 2563 vals = sbuf_a; 2564 for (i=0; i<rend-rstart; i++){ 2565 row = i + rstart; 2566 ncols = rptr[i+1] - rptr[i]; 2567 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2568 vals += ncols; 2569 cols += ncols; 2570 } 2571 /* insert received matrix entries */ 2572 for (imdex=0; imdex<nrecvs; imdex++){ 2573 rstart = rowrange[recv_rank[imdex]]; 2574 rend = rowrange[recv_rank[imdex]+1]; 2575 rptr = rbuf_j[imdex]; 2576 cols = rbuf_j[imdex] + rend-rstart + 1; 2577 vals = rbuf_a[imdex]; 2578 for (i=0; i<rend-rstart; i++){ 2579 row = i + rstart; 2580 ncols = rptr[i+1] - rptr[i]; 2581 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2582 vals += ncols; 2583 cols += ncols; 2584 } 2585 } 2586 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2588 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2589 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2590 if (reuse == MAT_INITIAL_MATRIX){ 2591 PetscContainer container; 2592 *matredundant = C; 2593 /* create a supporting struct and attach it to C for reuse */ 2594 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2595 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2596 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2597 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2598 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2599 2600 redund->nzlocal = nzlocal; 2601 redund->nsends = nsends; 2602 redund->nrecvs = nrecvs; 2603 redund->send_rank = send_rank; 2604 redund->recv_rank = recv_rank; 2605 redund->sbuf_nz = sbuf_nz; 2606 redund->rbuf_nz = rbuf_nz; 2607 redund->sbuf_j = sbuf_j; 2608 redund->sbuf_a = sbuf_a; 2609 redund->rbuf_j = rbuf_j; 2610 redund->rbuf_a = rbuf_a; 2611 2612 redund->MatDestroy = C->ops->destroy; 2613 C->ops->destroy = MatDestroy_MatRedundant; 2614 } 2615 PetscFunctionReturn(0); 2616 } 2617 2618 #undef __FUNCT__ 2619 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2620 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2621 { 2622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2623 PetscErrorCode ierr; 2624 PetscInt i,*idxb = 0; 2625 PetscScalar *va,*vb; 2626 Vec vtmp; 2627 2628 PetscFunctionBegin; 2629 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2630 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2631 if (idx) { 2632 for (i=0; i<A->rmap->n; i++) { 2633 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2634 } 2635 } 2636 2637 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2638 if (idx) { 2639 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2640 } 2641 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2642 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2643 2644 for (i=0; i<A->rmap->n; i++){ 2645 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2646 va[i] = vb[i]; 2647 if (idx) idx[i] = a->garray[idxb[i]]; 2648 } 2649 } 2650 2651 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2652 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2653 if (idxb) { 2654 ierr = PetscFree(idxb);CHKERRQ(ierr); 2655 } 2656 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2662 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2663 { 2664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2665 PetscErrorCode ierr; 2666 PetscInt i,*idxb = 0; 2667 PetscScalar *va,*vb; 2668 Vec vtmp; 2669 2670 PetscFunctionBegin; 2671 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2672 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2673 if (idx) { 2674 for (i=0; i<A->cmap->n; i++) { 2675 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2676 } 2677 } 2678 2679 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2680 if (idx) { 2681 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2682 } 2683 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2684 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2685 2686 for (i=0; i<A->rmap->n; i++){ 2687 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2688 va[i] = vb[i]; 2689 if (idx) idx[i] = a->garray[idxb[i]]; 2690 } 2691 } 2692 2693 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2694 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2695 if (idxb) { 2696 ierr = PetscFree(idxb);CHKERRQ(ierr); 2697 } 2698 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2699 PetscFunctionReturn(0); 2700 } 2701 2702 #undef __FUNCT__ 2703 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2704 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2705 { 2706 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2707 PetscInt n = A->rmap->n; 2708 PetscInt cstart = A->cmap->rstart; 2709 PetscInt *cmap = mat->garray; 2710 PetscInt *diagIdx, *offdiagIdx; 2711 Vec diagV, offdiagV; 2712 PetscScalar *a, *diagA, *offdiagA; 2713 PetscInt r; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2718 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2719 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2720 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2721 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2722 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2723 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2724 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2725 for(r = 0; r < n; ++r) { 2726 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2727 a[r] = diagA[r]; 2728 idx[r] = cstart + diagIdx[r]; 2729 } else { 2730 a[r] = offdiagA[r]; 2731 idx[r] = cmap[offdiagIdx[r]]; 2732 } 2733 } 2734 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2735 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2736 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2737 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2738 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2739 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2745 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2746 { 2747 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2748 PetscInt n = A->rmap->n; 2749 PetscInt cstart = A->cmap->rstart; 2750 PetscInt *cmap = mat->garray; 2751 PetscInt *diagIdx, *offdiagIdx; 2752 Vec diagV, offdiagV; 2753 PetscScalar *a, *diagA, *offdiagA; 2754 PetscInt r; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2759 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2760 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2761 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2762 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2763 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2764 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2765 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2766 for(r = 0; r < n; ++r) { 2767 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2768 a[r] = diagA[r]; 2769 idx[r] = cstart + diagIdx[r]; 2770 } else { 2771 a[r] = offdiagA[r]; 2772 idx[r] = cmap[offdiagIdx[r]]; 2773 } 2774 } 2775 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2776 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2777 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2778 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2779 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2780 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2781 PetscFunctionReturn(0); 2782 } 2783 2784 #undef __FUNCT__ 2785 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2786 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2787 { 2788 PetscErrorCode ierr; 2789 Mat *dummy; 2790 2791 PetscFunctionBegin; 2792 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2793 *newmat = *dummy; 2794 ierr = PetscFree(dummy);CHKERRQ(ierr); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2799 /* -------------------------------------------------------------------*/ 2800 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2801 MatGetRow_MPIAIJ, 2802 MatRestoreRow_MPIAIJ, 2803 MatMult_MPIAIJ, 2804 /* 4*/ MatMultAdd_MPIAIJ, 2805 MatMultTranspose_MPIAIJ, 2806 MatMultTransposeAdd_MPIAIJ, 2807 #ifdef PETSC_HAVE_PBGL 2808 MatSolve_MPIAIJ, 2809 #else 2810 0, 2811 #endif 2812 0, 2813 0, 2814 /*10*/ 0, 2815 0, 2816 0, 2817 MatSOR_MPIAIJ, 2818 MatTranspose_MPIAIJ, 2819 /*15*/ MatGetInfo_MPIAIJ, 2820 MatEqual_MPIAIJ, 2821 MatGetDiagonal_MPIAIJ, 2822 MatDiagonalScale_MPIAIJ, 2823 MatNorm_MPIAIJ, 2824 /*20*/ MatAssemblyBegin_MPIAIJ, 2825 MatAssemblyEnd_MPIAIJ, 2826 MatSetOption_MPIAIJ, 2827 MatZeroEntries_MPIAIJ, 2828 /*24*/ MatZeroRows_MPIAIJ, 2829 0, 2830 #ifdef PETSC_HAVE_PBGL 2831 0, 2832 #else 2833 0, 2834 #endif 2835 0, 2836 0, 2837 /*29*/ MatSetUpPreallocation_MPIAIJ, 2838 #ifdef PETSC_HAVE_PBGL 2839 0, 2840 #else 2841 0, 2842 #endif 2843 0, 2844 0, 2845 0, 2846 /*34*/ MatDuplicate_MPIAIJ, 2847 0, 2848 0, 2849 0, 2850 0, 2851 /*39*/ MatAXPY_MPIAIJ, 2852 MatGetSubMatrices_MPIAIJ, 2853 MatIncreaseOverlap_MPIAIJ, 2854 MatGetValues_MPIAIJ, 2855 MatCopy_MPIAIJ, 2856 /*44*/ MatGetRowMax_MPIAIJ, 2857 MatScale_MPIAIJ, 2858 0, 2859 0, 2860 0, 2861 /*49*/ MatSetBlockSize_MPIAIJ, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*54*/ MatFDColoringCreate_MPIAIJ, 2867 0, 2868 MatSetUnfactored_MPIAIJ, 2869 MatPermute_MPIAIJ, 2870 0, 2871 /*59*/ MatGetSubMatrix_MPIAIJ, 2872 MatDestroy_MPIAIJ, 2873 MatView_MPIAIJ, 2874 0, 2875 0, 2876 /*64*/ 0, 2877 0, 2878 0, 2879 0, 2880 0, 2881 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2882 MatGetRowMinAbs_MPIAIJ, 2883 0, 2884 MatSetColoring_MPIAIJ, 2885 #if defined(PETSC_HAVE_ADIC) 2886 MatSetValuesAdic_MPIAIJ, 2887 #else 2888 0, 2889 #endif 2890 MatSetValuesAdifor_MPIAIJ, 2891 /*75*/ MatFDColoringApply_AIJ, 2892 0, 2893 0, 2894 0, 2895 0, 2896 /*80*/ 0, 2897 0, 2898 0, 2899 /*83*/ MatLoad_MPIAIJ, 2900 0, 2901 0, 2902 0, 2903 0, 2904 0, 2905 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2906 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2907 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2908 MatPtAP_Basic, 2909 MatPtAPSymbolic_MPIAIJ, 2910 /*94*/ MatPtAPNumeric_MPIAIJ, 2911 0, 2912 0, 2913 0, 2914 0, 2915 /*99*/ 0, 2916 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2917 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2918 MatConjugate_MPIAIJ, 2919 0, 2920 /*104*/MatSetValuesRow_MPIAIJ, 2921 MatRealPart_MPIAIJ, 2922 MatImaginaryPart_MPIAIJ, 2923 0, 2924 0, 2925 /*109*/0, 2926 MatGetRedundantMatrix_MPIAIJ, 2927 MatGetRowMin_MPIAIJ, 2928 0, 2929 0, 2930 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2931 0, 2932 0, 2933 0, 2934 0, 2935 /*119*/0, 2936 0, 2937 0, 2938 0, 2939 MatGetMultiProcBlock_MPIAIJ 2940 }; 2941 2942 /* ----------------------------------------------------------------------------------------*/ 2943 2944 EXTERN_C_BEGIN 2945 #undef __FUNCT__ 2946 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2947 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2948 { 2949 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2954 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 EXTERN_C_END 2958 2959 EXTERN_C_BEGIN 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2962 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2963 { 2964 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2965 PetscErrorCode ierr; 2966 2967 PetscFunctionBegin; 2968 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2969 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 EXTERN_C_END 2973 2974 EXTERN_C_BEGIN 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2977 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2978 { 2979 Mat_MPIAIJ *b; 2980 PetscErrorCode ierr; 2981 PetscInt i; 2982 2983 PetscFunctionBegin; 2984 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2985 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2986 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2987 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2988 2989 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2990 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2991 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2992 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2993 if (d_nnz) { 2994 for (i=0; i<B->rmap->n; i++) { 2995 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2996 } 2997 } 2998 if (o_nnz) { 2999 for (i=0; i<B->rmap->n; i++) { 3000 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3001 } 3002 } 3003 b = (Mat_MPIAIJ*)B->data; 3004 3005 if (!B->preallocated) { 3006 /* Explicitly create 2 MATSEQAIJ matrices. */ 3007 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3008 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3009 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3010 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3011 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3012 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3013 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3014 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3015 } 3016 3017 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3018 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3019 B->preallocated = PETSC_TRUE; 3020 PetscFunctionReturn(0); 3021 } 3022 EXTERN_C_END 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3026 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3027 { 3028 Mat mat; 3029 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3030 PetscErrorCode ierr; 3031 3032 PetscFunctionBegin; 3033 *newmat = 0; 3034 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3035 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3036 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3037 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3038 a = (Mat_MPIAIJ*)mat->data; 3039 3040 mat->factortype = matin->factortype; 3041 mat->rmap->bs = matin->rmap->bs; 3042 mat->assembled = PETSC_TRUE; 3043 mat->insertmode = NOT_SET_VALUES; 3044 mat->preallocated = PETSC_TRUE; 3045 3046 a->size = oldmat->size; 3047 a->rank = oldmat->rank; 3048 a->donotstash = oldmat->donotstash; 3049 a->roworiented = oldmat->roworiented; 3050 a->rowindices = 0; 3051 a->rowvalues = 0; 3052 a->getrowactive = PETSC_FALSE; 3053 3054 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3055 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3056 3057 if (oldmat->colmap) { 3058 #if defined (PETSC_USE_CTABLE) 3059 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3060 #else 3061 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3062 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3063 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3064 #endif 3065 } else a->colmap = 0; 3066 if (oldmat->garray) { 3067 PetscInt len; 3068 len = oldmat->B->cmap->n; 3069 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3070 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3071 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3072 } else a->garray = 0; 3073 3074 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3075 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3076 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3077 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3078 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3079 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3080 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3081 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3082 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3083 *newmat = mat; 3084 PetscFunctionReturn(0); 3085 } 3086 3087 /* 3088 Allows sending/receiving larger messages then 2 gigabytes in a single call 3089 */ 3090 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3091 { 3092 int ierr; 3093 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3094 PetscInt i,numchunks; 3095 PetscMPIInt icnt; 3096 3097 numchunks = cnt/CHUNKSIZE + 1; 3098 for (i=0; i<numchunks; i++) { 3099 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3100 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 3101 if (type == MPIU_INT) { 3102 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3103 } else if (type == MPIU_SCALAR) { 3104 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3105 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3106 } 3107 return 0; 3108 } 3109 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3110 { 3111 int ierr; 3112 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3113 MPI_Status status; 3114 PetscInt i,numchunks; 3115 PetscMPIInt icnt; 3116 3117 numchunks = cnt/CHUNKSIZE + 1; 3118 for (i=0; i<numchunks; i++) { 3119 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3120 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 3121 if (type == MPIU_INT) { 3122 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3123 } else if (type == MPIU_SCALAR) { 3124 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3125 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3126 } 3127 return 0; 3128 } 3129 3130 #undef __FUNCT__ 3131 #define __FUNCT__ "MatLoad_MPIAIJ" 3132 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3133 { 3134 PetscScalar *vals,*svals; 3135 MPI_Comm comm = ((PetscObject)viewer)->comm; 3136 PetscErrorCode ierr; 3137 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3138 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3139 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3140 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3141 PetscInt cend,cstart,n,*rowners,sizesset=1; 3142 int fd; 3143 3144 PetscFunctionBegin; 3145 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3146 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3147 if (!rank) { 3148 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3149 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3150 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3151 } 3152 3153 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3154 3155 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3156 M = header[1]; N = header[2]; 3157 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3158 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3159 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3160 3161 /* If global sizes are set, check if they are consistent with that given in the file */ 3162 if (sizesset) { 3163 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3164 } 3165 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3166 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3167 3168 /* determine ownership of all rows */ 3169 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3170 else m = newMat->rmap->n; /* Set by user */ 3171 3172 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3173 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3174 3175 /* First process needs enough room for process with most rows */ 3176 if (!rank) { 3177 mmax = rowners[1]; 3178 for (i=2; i<size; i++) { 3179 mmax = PetscMax(mmax,rowners[i]); 3180 } 3181 } else mmax = m; 3182 3183 rowners[0] = 0; 3184 for (i=2; i<=size; i++) { 3185 rowners[i] += rowners[i-1]; 3186 } 3187 rstart = rowners[rank]; 3188 rend = rowners[rank+1]; 3189 3190 /* distribute row lengths to all processors */ 3191 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3192 if (!rank) { 3193 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3194 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3195 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3196 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3197 for (j=0; j<m; j++) { 3198 procsnz[0] += ourlens[j]; 3199 } 3200 for (i=1; i<size; i++) { 3201 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3202 /* calculate the number of nonzeros on each processor */ 3203 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3204 procsnz[i] += rowlengths[j]; 3205 } 3206 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3207 } 3208 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3209 } else { 3210 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3211 } 3212 3213 if (!rank) { 3214 /* determine max buffer needed and allocate it */ 3215 maxnz = 0; 3216 for (i=0; i<size; i++) { 3217 maxnz = PetscMax(maxnz,procsnz[i]); 3218 } 3219 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3220 3221 /* read in my part of the matrix column indices */ 3222 nz = procsnz[0]; 3223 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3224 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3225 3226 /* read in every one elses and ship off */ 3227 for (i=1; i<size; i++) { 3228 nz = procsnz[i]; 3229 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3230 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3231 } 3232 ierr = PetscFree(cols);CHKERRQ(ierr); 3233 } else { 3234 /* determine buffer space needed for message */ 3235 nz = 0; 3236 for (i=0; i<m; i++) { 3237 nz += ourlens[i]; 3238 } 3239 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3240 3241 /* receive message of column indices*/ 3242 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3243 } 3244 3245 /* determine column ownership if matrix is not square */ 3246 if (N != M) { 3247 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3248 else n = newMat->cmap->n; 3249 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3250 cstart = cend - n; 3251 } else { 3252 cstart = rstart; 3253 cend = rend; 3254 n = cend - cstart; 3255 } 3256 3257 /* loop over local rows, determining number of off diagonal entries */ 3258 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3259 jj = 0; 3260 for (i=0; i<m; i++) { 3261 for (j=0; j<ourlens[i]; j++) { 3262 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3263 jj++; 3264 } 3265 } 3266 3267 for (i=0; i<m; i++) { 3268 ourlens[i] -= offlens[i]; 3269 } 3270 if (!sizesset) { 3271 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3272 } 3273 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3274 3275 for (i=0; i<m; i++) { 3276 ourlens[i] += offlens[i]; 3277 } 3278 3279 if (!rank) { 3280 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3281 3282 /* read in my part of the matrix numerical values */ 3283 nz = procsnz[0]; 3284 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3285 3286 /* insert into matrix */ 3287 jj = rstart; 3288 smycols = mycols; 3289 svals = vals; 3290 for (i=0; i<m; i++) { 3291 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3292 smycols += ourlens[i]; 3293 svals += ourlens[i]; 3294 jj++; 3295 } 3296 3297 /* read in other processors and ship out */ 3298 for (i=1; i<size; i++) { 3299 nz = procsnz[i]; 3300 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3301 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3302 } 3303 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3304 } else { 3305 /* receive numeric values */ 3306 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3307 3308 /* receive message of values*/ 3309 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3310 3311 /* insert into matrix */ 3312 jj = rstart; 3313 smycols = mycols; 3314 svals = vals; 3315 for (i=0; i<m; i++) { 3316 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3317 smycols += ourlens[i]; 3318 svals += ourlens[i]; 3319 jj++; 3320 } 3321 } 3322 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3323 ierr = PetscFree(vals);CHKERRQ(ierr); 3324 ierr = PetscFree(mycols);CHKERRQ(ierr); 3325 ierr = PetscFree(rowners);CHKERRQ(ierr); 3326 3327 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 PetscFunctionReturn(0); 3330 } 3331 3332 #undef __FUNCT__ 3333 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3334 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3335 { 3336 PetscErrorCode ierr; 3337 IS iscol_local; 3338 PetscInt csize; 3339 3340 PetscFunctionBegin; 3341 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3342 if (call == MAT_REUSE_MATRIX) { 3343 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3344 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3345 } else { 3346 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3347 } 3348 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3349 if (call == MAT_INITIAL_MATRIX) { 3350 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3351 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3352 } 3353 PetscFunctionReturn(0); 3354 } 3355 3356 #undef __FUNCT__ 3357 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3358 /* 3359 Not great since it makes two copies of the submatrix, first an SeqAIJ 3360 in local and then by concatenating the local matrices the end result. 3361 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3362 3363 Note: This requires a sequential iscol with all indices. 3364 */ 3365 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3366 { 3367 PetscErrorCode ierr; 3368 PetscMPIInt rank,size; 3369 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3370 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3371 Mat *local,M,Mreuse; 3372 MatScalar *vwork,*aa; 3373 MPI_Comm comm = ((PetscObject)mat)->comm; 3374 Mat_SeqAIJ *aij; 3375 3376 3377 PetscFunctionBegin; 3378 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3379 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3380 3381 if (call == MAT_REUSE_MATRIX) { 3382 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3383 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3384 local = &Mreuse; 3385 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3386 } else { 3387 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3388 Mreuse = *local; 3389 ierr = PetscFree(local);CHKERRQ(ierr); 3390 } 3391 3392 /* 3393 m - number of local rows 3394 n - number of columns (same on all processors) 3395 rstart - first row in new global matrix generated 3396 */ 3397 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3398 if (call == MAT_INITIAL_MATRIX) { 3399 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3400 ii = aij->i; 3401 jj = aij->j; 3402 3403 /* 3404 Determine the number of non-zeros in the diagonal and off-diagonal 3405 portions of the matrix in order to do correct preallocation 3406 */ 3407 3408 /* first get start and end of "diagonal" columns */ 3409 if (csize == PETSC_DECIDE) { 3410 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3411 if (mglobal == n) { /* square matrix */ 3412 nlocal = m; 3413 } else { 3414 nlocal = n/size + ((n % size) > rank); 3415 } 3416 } else { 3417 nlocal = csize; 3418 } 3419 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3420 rstart = rend - nlocal; 3421 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3422 3423 /* next, compute all the lengths */ 3424 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3425 olens = dlens + m; 3426 for (i=0; i<m; i++) { 3427 jend = ii[i+1] - ii[i]; 3428 olen = 0; 3429 dlen = 0; 3430 for (j=0; j<jend; j++) { 3431 if (*jj < rstart || *jj >= rend) olen++; 3432 else dlen++; 3433 jj++; 3434 } 3435 olens[i] = olen; 3436 dlens[i] = dlen; 3437 } 3438 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3439 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3440 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3441 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3442 ierr = PetscFree(dlens);CHKERRQ(ierr); 3443 } else { 3444 PetscInt ml,nl; 3445 3446 M = *newmat; 3447 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3448 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3449 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3450 /* 3451 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3452 rather than the slower MatSetValues(). 3453 */ 3454 M->was_assembled = PETSC_TRUE; 3455 M->assembled = PETSC_FALSE; 3456 } 3457 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3458 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3459 ii = aij->i; 3460 jj = aij->j; 3461 aa = aij->a; 3462 for (i=0; i<m; i++) { 3463 row = rstart + i; 3464 nz = ii[i+1] - ii[i]; 3465 cwork = jj; jj += nz; 3466 vwork = aa; aa += nz; 3467 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3468 } 3469 3470 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3471 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3472 *newmat = M; 3473 3474 /* save submatrix used in processor for next request */ 3475 if (call == MAT_INITIAL_MATRIX) { 3476 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3477 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3478 } 3479 3480 PetscFunctionReturn(0); 3481 } 3482 3483 EXTERN_C_BEGIN 3484 #undef __FUNCT__ 3485 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3487 { 3488 PetscInt m,cstart, cend,j,nnz,i,d; 3489 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3490 const PetscInt *JJ; 3491 PetscScalar *values; 3492 PetscErrorCode ierr; 3493 3494 PetscFunctionBegin; 3495 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3496 3497 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3498 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3499 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3500 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3501 m = B->rmap->n; 3502 cstart = B->cmap->rstart; 3503 cend = B->cmap->rend; 3504 rstart = B->rmap->rstart; 3505 3506 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3507 3508 #if defined(PETSC_USE_DEBUGGING) 3509 for (i=0; i<m; i++) { 3510 nnz = Ii[i+1]- Ii[i]; 3511 JJ = J + Ii[i]; 3512 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3513 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3514 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3515 } 3516 #endif 3517 3518 for (i=0; i<m; i++) { 3519 nnz = Ii[i+1]- Ii[i]; 3520 JJ = J + Ii[i]; 3521 nnz_max = PetscMax(nnz_max,nnz); 3522 d = 0; 3523 for (j=0; j<nnz; j++) { 3524 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3525 } 3526 d_nnz[i] = d; 3527 o_nnz[i] = nnz - d; 3528 } 3529 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3530 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3531 3532 if (v) values = (PetscScalar*)v; 3533 else { 3534 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3535 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3536 } 3537 3538 for (i=0; i<m; i++) { 3539 ii = i + rstart; 3540 nnz = Ii[i+1]- Ii[i]; 3541 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3542 } 3543 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3544 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3545 3546 if (!v) { 3547 ierr = PetscFree(values);CHKERRQ(ierr); 3548 } 3549 PetscFunctionReturn(0); 3550 } 3551 EXTERN_C_END 3552 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3555 /*@ 3556 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3557 (the default parallel PETSc format). 3558 3559 Collective on MPI_Comm 3560 3561 Input Parameters: 3562 + B - the matrix 3563 . i - the indices into j for the start of each local row (starts with zero) 3564 . j - the column indices for each local row (starts with zero) 3565 - v - optional values in the matrix 3566 3567 Level: developer 3568 3569 Notes: 3570 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3571 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3572 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3573 3574 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3575 3576 The format which is used for the sparse matrix input, is equivalent to a 3577 row-major ordering.. i.e for the following matrix, the input data expected is 3578 as shown: 3579 3580 1 0 0 3581 2 0 3 P0 3582 ------- 3583 4 5 6 P1 3584 3585 Process0 [P0]: rows_owned=[0,1] 3586 i = {0,1,3} [size = nrow+1 = 2+1] 3587 j = {0,0,2} [size = nz = 6] 3588 v = {1,2,3} [size = nz = 6] 3589 3590 Process1 [P1]: rows_owned=[2] 3591 i = {0,3} [size = nrow+1 = 1+1] 3592 j = {0,1,2} [size = nz = 6] 3593 v = {4,5,6} [size = nz = 6] 3594 3595 .keywords: matrix, aij, compressed row, sparse, parallel 3596 3597 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3598 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3599 @*/ 3600 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3601 { 3602 PetscErrorCode ierr; 3603 3604 PetscFunctionBegin; 3605 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3606 PetscFunctionReturn(0); 3607 } 3608 3609 #undef __FUNCT__ 3610 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3611 /*@C 3612 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3613 (the default parallel PETSc format). For good matrix assembly performance 3614 the user should preallocate the matrix storage by setting the parameters 3615 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3616 performance can be increased by more than a factor of 50. 3617 3618 Collective on MPI_Comm 3619 3620 Input Parameters: 3621 + A - the matrix 3622 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3623 (same value is used for all local rows) 3624 . d_nnz - array containing the number of nonzeros in the various rows of the 3625 DIAGONAL portion of the local submatrix (possibly different for each row) 3626 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3627 The size of this array is equal to the number of local rows, i.e 'm'. 3628 You must leave room for the diagonal entry even if it is zero. 3629 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3630 submatrix (same value is used for all local rows). 3631 - o_nnz - array containing the number of nonzeros in the various rows of the 3632 OFF-DIAGONAL portion of the local submatrix (possibly different for 3633 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3634 structure. The size of this array is equal to the number 3635 of local rows, i.e 'm'. 3636 3637 If the *_nnz parameter is given then the *_nz parameter is ignored 3638 3639 The AIJ format (also called the Yale sparse matrix format or 3640 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3641 storage. The stored row and column indices begin with zero. 3642 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3643 3644 The parallel matrix is partitioned such that the first m0 rows belong to 3645 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3646 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3647 3648 The DIAGONAL portion of the local submatrix of a processor can be defined 3649 as the submatrix which is obtained by extraction the part corresponding to 3650 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3651 first row that belongs to the processor, r2 is the last row belonging to 3652 the this processor, and c1-c2 is range of indices of the local part of a 3653 vector suitable for applying the matrix to. This is an mxn matrix. In the 3654 common case of a square matrix, the row and column ranges are the same and 3655 the DIAGONAL part is also square. The remaining portion of the local 3656 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3657 3658 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3659 3660 You can call MatGetInfo() to get information on how effective the preallocation was; 3661 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3662 You can also run with the option -info and look for messages with the string 3663 malloc in them to see if additional memory allocation was needed. 3664 3665 Example usage: 3666 3667 Consider the following 8x8 matrix with 34 non-zero values, that is 3668 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3669 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3670 as follows: 3671 3672 .vb 3673 1 2 0 | 0 3 0 | 0 4 3674 Proc0 0 5 6 | 7 0 0 | 8 0 3675 9 0 10 | 11 0 0 | 12 0 3676 ------------------------------------- 3677 13 0 14 | 15 16 17 | 0 0 3678 Proc1 0 18 0 | 19 20 21 | 0 0 3679 0 0 0 | 22 23 0 | 24 0 3680 ------------------------------------- 3681 Proc2 25 26 27 | 0 0 28 | 29 0 3682 30 0 0 | 31 32 33 | 0 34 3683 .ve 3684 3685 This can be represented as a collection of submatrices as: 3686 3687 .vb 3688 A B C 3689 D E F 3690 G H I 3691 .ve 3692 3693 Where the submatrices A,B,C are owned by proc0, D,E,F are 3694 owned by proc1, G,H,I are owned by proc2. 3695 3696 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3697 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3698 The 'M','N' parameters are 8,8, and have the same values on all procs. 3699 3700 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3701 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3702 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3703 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3704 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3705 matrix, ans [DF] as another SeqAIJ matrix. 3706 3707 When d_nz, o_nz parameters are specified, d_nz storage elements are 3708 allocated for every row of the local diagonal submatrix, and o_nz 3709 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3710 One way to choose d_nz and o_nz is to use the max nonzerors per local 3711 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3712 In this case, the values of d_nz,o_nz are: 3713 .vb 3714 proc0 : dnz = 2, o_nz = 2 3715 proc1 : dnz = 3, o_nz = 2 3716 proc2 : dnz = 1, o_nz = 4 3717 .ve 3718 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3719 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3720 for proc3. i.e we are using 12+15+10=37 storage locations to store 3721 34 values. 3722 3723 When d_nnz, o_nnz parameters are specified, the storage is specified 3724 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3725 In the above case the values for d_nnz,o_nnz are: 3726 .vb 3727 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3728 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3729 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3730 .ve 3731 Here the space allocated is sum of all the above values i.e 34, and 3732 hence pre-allocation is perfect. 3733 3734 Level: intermediate 3735 3736 .keywords: matrix, aij, compressed row, sparse, parallel 3737 3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3739 MPIAIJ, MatGetInfo() 3740 @*/ 3741 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3742 { 3743 PetscErrorCode ierr; 3744 3745 PetscFunctionBegin; 3746 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3752 /*@ 3753 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3754 CSR format the local rows. 3755 3756 Collective on MPI_Comm 3757 3758 Input Parameters: 3759 + comm - MPI communicator 3760 . m - number of local rows (Cannot be PETSC_DECIDE) 3761 . n - This value should be the same as the local size used in creating the 3762 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3763 calculated if N is given) For square matrices n is almost always m. 3764 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3765 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3766 . i - row indices 3767 . j - column indices 3768 - a - matrix values 3769 3770 Output Parameter: 3771 . mat - the matrix 3772 3773 Level: intermediate 3774 3775 Notes: 3776 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3777 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3778 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3779 3780 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3781 3782 The format which is used for the sparse matrix input, is equivalent to a 3783 row-major ordering.. i.e for the following matrix, the input data expected is 3784 as shown: 3785 3786 1 0 0 3787 2 0 3 P0 3788 ------- 3789 4 5 6 P1 3790 3791 Process0 [P0]: rows_owned=[0,1] 3792 i = {0,1,3} [size = nrow+1 = 2+1] 3793 j = {0,0,2} [size = nz = 6] 3794 v = {1,2,3} [size = nz = 6] 3795 3796 Process1 [P1]: rows_owned=[2] 3797 i = {0,3} [size = nrow+1 = 1+1] 3798 j = {0,1,2} [size = nz = 6] 3799 v = {4,5,6} [size = nz = 6] 3800 3801 .keywords: matrix, aij, compressed row, sparse, parallel 3802 3803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3804 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3805 @*/ 3806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3807 { 3808 PetscErrorCode ierr; 3809 3810 PetscFunctionBegin; 3811 if (i[0]) { 3812 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3813 } 3814 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3815 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3816 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3817 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3818 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3819 PetscFunctionReturn(0); 3820 } 3821 3822 #undef __FUNCT__ 3823 #define __FUNCT__ "MatCreateMPIAIJ" 3824 /*@C 3825 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3826 (the default parallel PETSc format). For good matrix assembly performance 3827 the user should preallocate the matrix storage by setting the parameters 3828 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3829 performance can be increased by more than a factor of 50. 3830 3831 Collective on MPI_Comm 3832 3833 Input Parameters: 3834 + comm - MPI communicator 3835 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3836 This value should be the same as the local size used in creating the 3837 y vector for the matrix-vector product y = Ax. 3838 . n - This value should be the same as the local size used in creating the 3839 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3840 calculated if N is given) For square matrices n is almost always m. 3841 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3842 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3843 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3844 (same value is used for all local rows) 3845 . d_nnz - array containing the number of nonzeros in the various rows of the 3846 DIAGONAL portion of the local submatrix (possibly different for each row) 3847 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3848 The size of this array is equal to the number of local rows, i.e 'm'. 3849 You must leave room for the diagonal entry even if it is zero. 3850 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3851 submatrix (same value is used for all local rows). 3852 - o_nnz - array containing the number of nonzeros in the various rows of the 3853 OFF-DIAGONAL portion of the local submatrix (possibly different for 3854 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3855 structure. The size of this array is equal to the number 3856 of local rows, i.e 'm'. 3857 3858 Output Parameter: 3859 . A - the matrix 3860 3861 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3862 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3863 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3864 3865 Notes: 3866 If the *_nnz parameter is given then the *_nz parameter is ignored 3867 3868 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3869 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3870 storage requirements for this matrix. 3871 3872 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3873 processor than it must be used on all processors that share the object for 3874 that argument. 3875 3876 The user MUST specify either the local or global matrix dimensions 3877 (possibly both). 3878 3879 The parallel matrix is partitioned across processors such that the 3880 first m0 rows belong to process 0, the next m1 rows belong to 3881 process 1, the next m2 rows belong to process 2 etc.. where 3882 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3883 values corresponding to [m x N] submatrix. 3884 3885 The columns are logically partitioned with the n0 columns belonging 3886 to 0th partition, the next n1 columns belonging to the next 3887 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3888 3889 The DIAGONAL portion of the local submatrix on any given processor 3890 is the submatrix corresponding to the rows and columns m,n 3891 corresponding to the given processor. i.e diagonal matrix on 3892 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3893 etc. The remaining portion of the local submatrix [m x (N-n)] 3894 constitute the OFF-DIAGONAL portion. The example below better 3895 illustrates this concept. 3896 3897 For a square global matrix we define each processor's diagonal portion 3898 to be its local rows and the corresponding columns (a square submatrix); 3899 each processor's off-diagonal portion encompasses the remainder of the 3900 local matrix (a rectangular submatrix). 3901 3902 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3903 3904 When calling this routine with a single process communicator, a matrix of 3905 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3906 type of communicator, use the construction mechanism: 3907 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3908 3909 By default, this format uses inodes (identical nodes) when possible. 3910 We search for consecutive rows with the same nonzero structure, thereby 3911 reusing matrix information to achieve increased efficiency. 3912 3913 Options Database Keys: 3914 + -mat_no_inode - Do not use inodes 3915 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3916 - -mat_aij_oneindex - Internally use indexing starting at 1 3917 rather than 0. Note that when calling MatSetValues(), 3918 the user still MUST index entries starting at 0! 3919 3920 3921 Example usage: 3922 3923 Consider the following 8x8 matrix with 34 non-zero values, that is 3924 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3925 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3926 as follows: 3927 3928 .vb 3929 1 2 0 | 0 3 0 | 0 4 3930 Proc0 0 5 6 | 7 0 0 | 8 0 3931 9 0 10 | 11 0 0 | 12 0 3932 ------------------------------------- 3933 13 0 14 | 15 16 17 | 0 0 3934 Proc1 0 18 0 | 19 20 21 | 0 0 3935 0 0 0 | 22 23 0 | 24 0 3936 ------------------------------------- 3937 Proc2 25 26 27 | 0 0 28 | 29 0 3938 30 0 0 | 31 32 33 | 0 34 3939 .ve 3940 3941 This can be represented as a collection of submatrices as: 3942 3943 .vb 3944 A B C 3945 D E F 3946 G H I 3947 .ve 3948 3949 Where the submatrices A,B,C are owned by proc0, D,E,F are 3950 owned by proc1, G,H,I are owned by proc2. 3951 3952 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3953 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3954 The 'M','N' parameters are 8,8, and have the same values on all procs. 3955 3956 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3957 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3958 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3959 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3960 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3961 matrix, ans [DF] as another SeqAIJ matrix. 3962 3963 When d_nz, o_nz parameters are specified, d_nz storage elements are 3964 allocated for every row of the local diagonal submatrix, and o_nz 3965 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3966 One way to choose d_nz and o_nz is to use the max nonzerors per local 3967 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3968 In this case, the values of d_nz,o_nz are: 3969 .vb 3970 proc0 : dnz = 2, o_nz = 2 3971 proc1 : dnz = 3, o_nz = 2 3972 proc2 : dnz = 1, o_nz = 4 3973 .ve 3974 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3975 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3976 for proc3. i.e we are using 12+15+10=37 storage locations to store 3977 34 values. 3978 3979 When d_nnz, o_nnz parameters are specified, the storage is specified 3980 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3981 In the above case the values for d_nnz,o_nnz are: 3982 .vb 3983 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3984 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3985 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3986 .ve 3987 Here the space allocated is sum of all the above values i.e 34, and 3988 hence pre-allocation is perfect. 3989 3990 Level: intermediate 3991 3992 .keywords: matrix, aij, compressed row, sparse, parallel 3993 3994 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3995 MPIAIJ, MatCreateMPIAIJWithArrays() 3996 @*/ 3997 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3998 { 3999 PetscErrorCode ierr; 4000 PetscMPIInt size; 4001 4002 PetscFunctionBegin; 4003 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4004 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4005 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4006 if (size > 1) { 4007 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4008 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4009 } else { 4010 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4011 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4012 } 4013 PetscFunctionReturn(0); 4014 } 4015 4016 #undef __FUNCT__ 4017 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4018 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4019 { 4020 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4021 4022 PetscFunctionBegin; 4023 *Ad = a->A; 4024 *Ao = a->B; 4025 *colmap = a->garray; 4026 PetscFunctionReturn(0); 4027 } 4028 4029 #undef __FUNCT__ 4030 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4031 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4032 { 4033 PetscErrorCode ierr; 4034 PetscInt i; 4035 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4036 4037 PetscFunctionBegin; 4038 if (coloring->ctype == IS_COLORING_GLOBAL) { 4039 ISColoringValue *allcolors,*colors; 4040 ISColoring ocoloring; 4041 4042 /* set coloring for diagonal portion */ 4043 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4044 4045 /* set coloring for off-diagonal portion */ 4046 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4047 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4048 for (i=0; i<a->B->cmap->n; i++) { 4049 colors[i] = allcolors[a->garray[i]]; 4050 } 4051 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4052 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4053 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4054 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4055 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4056 ISColoringValue *colors; 4057 PetscInt *larray; 4058 ISColoring ocoloring; 4059 4060 /* set coloring for diagonal portion */ 4061 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4062 for (i=0; i<a->A->cmap->n; i++) { 4063 larray[i] = i + A->cmap->rstart; 4064 } 4065 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4066 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4067 for (i=0; i<a->A->cmap->n; i++) { 4068 colors[i] = coloring->colors[larray[i]]; 4069 } 4070 ierr = PetscFree(larray);CHKERRQ(ierr); 4071 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4072 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4073 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4074 4075 /* set coloring for off-diagonal portion */ 4076 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4077 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4078 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4079 for (i=0; i<a->B->cmap->n; i++) { 4080 colors[i] = coloring->colors[larray[i]]; 4081 } 4082 ierr = PetscFree(larray);CHKERRQ(ierr); 4083 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4084 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4085 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 4086 } else { 4087 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4088 } 4089 4090 PetscFunctionReturn(0); 4091 } 4092 4093 #if defined(PETSC_HAVE_ADIC) 4094 #undef __FUNCT__ 4095 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4096 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4097 { 4098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4099 PetscErrorCode ierr; 4100 4101 PetscFunctionBegin; 4102 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4103 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4104 PetscFunctionReturn(0); 4105 } 4106 #endif 4107 4108 #undef __FUNCT__ 4109 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4110 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4111 { 4112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4113 PetscErrorCode ierr; 4114 4115 PetscFunctionBegin; 4116 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4117 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4118 PetscFunctionReturn(0); 4119 } 4120 4121 #undef __FUNCT__ 4122 #define __FUNCT__ "MatMerge" 4123 /*@ 4124 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4125 matrices from each processor 4126 4127 Collective on MPI_Comm 4128 4129 Input Parameters: 4130 + comm - the communicators the parallel matrix will live on 4131 . inmat - the input sequential matrices 4132 . n - number of local columns (or PETSC_DECIDE) 4133 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4134 4135 Output Parameter: 4136 . outmat - the parallel matrix generated 4137 4138 Level: advanced 4139 4140 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4141 4142 @*/ 4143 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4144 { 4145 PetscErrorCode ierr; 4146 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4147 PetscInt *indx; 4148 PetscScalar *values; 4149 4150 PetscFunctionBegin; 4151 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4152 if (scall == MAT_INITIAL_MATRIX){ 4153 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4154 if (n == PETSC_DECIDE){ 4155 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4156 } 4157 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4158 rstart -= m; 4159 4160 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4161 for (i=0;i<m;i++) { 4162 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4163 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4164 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4165 } 4166 /* This routine will ONLY return MPIAIJ type matrix */ 4167 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4168 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4169 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4170 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4171 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4172 4173 } else if (scall == MAT_REUSE_MATRIX){ 4174 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4175 } else { 4176 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4177 } 4178 4179 for (i=0;i<m;i++) { 4180 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4181 Ii = i + rstart; 4182 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4183 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4184 } 4185 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4186 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4187 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4188 4189 PetscFunctionReturn(0); 4190 } 4191 4192 #undef __FUNCT__ 4193 #define __FUNCT__ "MatFileSplit" 4194 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4195 { 4196 PetscErrorCode ierr; 4197 PetscMPIInt rank; 4198 PetscInt m,N,i,rstart,nnz; 4199 size_t len; 4200 const PetscInt *indx; 4201 PetscViewer out; 4202 char *name; 4203 Mat B; 4204 const PetscScalar *values; 4205 4206 PetscFunctionBegin; 4207 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4208 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4209 /* Should this be the type of the diagonal block of A? */ 4210 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4211 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4212 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4213 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4214 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4215 for (i=0;i<m;i++) { 4216 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4217 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4218 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4219 } 4220 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4221 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4222 4223 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4224 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4225 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4226 sprintf(name,"%s.%d",outfile,rank); 4227 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4228 ierr = PetscFree(name); 4229 ierr = MatView(B,out);CHKERRQ(ierr); 4230 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4231 ierr = MatDestroy(B);CHKERRQ(ierr); 4232 PetscFunctionReturn(0); 4233 } 4234 4235 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4236 #undef __FUNCT__ 4237 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4238 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4239 { 4240 PetscErrorCode ierr; 4241 Mat_Merge_SeqsToMPI *merge; 4242 PetscContainer container; 4243 4244 PetscFunctionBegin; 4245 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4246 if (container) { 4247 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4248 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4249 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4250 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4251 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4252 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4253 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4254 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4255 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4256 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4257 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4258 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4259 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4260 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4261 4262 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4263 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4264 } 4265 ierr = PetscFree(merge);CHKERRQ(ierr); 4266 4267 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4268 PetscFunctionReturn(0); 4269 } 4270 4271 #include "../src/mat/utils/freespace.h" 4272 #include "petscbt.h" 4273 4274 #undef __FUNCT__ 4275 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4276 /*@C 4277 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4278 matrices from each processor 4279 4280 Collective on MPI_Comm 4281 4282 Input Parameters: 4283 + comm - the communicators the parallel matrix will live on 4284 . seqmat - the input sequential matrices 4285 . m - number of local rows (or PETSC_DECIDE) 4286 . n - number of local columns (or PETSC_DECIDE) 4287 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4288 4289 Output Parameter: 4290 . mpimat - the parallel matrix generated 4291 4292 Level: advanced 4293 4294 Notes: 4295 The dimensions of the sequential matrix in each processor MUST be the same. 4296 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4297 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4298 @*/ 4299 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4300 { 4301 PetscErrorCode ierr; 4302 MPI_Comm comm=((PetscObject)mpimat)->comm; 4303 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4304 PetscMPIInt size,rank,taga,*len_s; 4305 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4306 PetscInt proc,m; 4307 PetscInt **buf_ri,**buf_rj; 4308 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4309 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4310 MPI_Request *s_waits,*r_waits; 4311 MPI_Status *status; 4312 MatScalar *aa=a->a; 4313 MatScalar **abuf_r,*ba_i; 4314 Mat_Merge_SeqsToMPI *merge; 4315 PetscContainer container; 4316 4317 PetscFunctionBegin; 4318 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4319 4320 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4321 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4322 4323 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4324 if (container) { 4325 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4326 } 4327 bi = merge->bi; 4328 bj = merge->bj; 4329 buf_ri = merge->buf_ri; 4330 buf_rj = merge->buf_rj; 4331 4332 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4333 owners = merge->rowmap->range; 4334 len_s = merge->len_s; 4335 4336 /* send and recv matrix values */ 4337 /*-----------------------------*/ 4338 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4339 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4340 4341 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4342 for (proc=0,k=0; proc<size; proc++){ 4343 if (!len_s[proc]) continue; 4344 i = owners[proc]; 4345 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4346 k++; 4347 } 4348 4349 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4350 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4351 ierr = PetscFree(status);CHKERRQ(ierr); 4352 4353 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4354 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4355 4356 /* insert mat values of mpimat */ 4357 /*----------------------------*/ 4358 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4359 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4360 4361 for (k=0; k<merge->nrecv; k++){ 4362 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4363 nrows = *(buf_ri_k[k]); 4364 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4365 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4366 } 4367 4368 /* set values of ba */ 4369 m = merge->rowmap->n; 4370 for (i=0; i<m; i++) { 4371 arow = owners[rank] + i; 4372 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4373 bnzi = bi[i+1] - bi[i]; 4374 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4375 4376 /* add local non-zero vals of this proc's seqmat into ba */ 4377 anzi = ai[arow+1] - ai[arow]; 4378 aj = a->j + ai[arow]; 4379 aa = a->a + ai[arow]; 4380 nextaj = 0; 4381 for (j=0; nextaj<anzi; j++){ 4382 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4383 ba_i[j] += aa[nextaj++]; 4384 } 4385 } 4386 4387 /* add received vals into ba */ 4388 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4389 /* i-th row */ 4390 if (i == *nextrow[k]) { 4391 anzi = *(nextai[k]+1) - *nextai[k]; 4392 aj = buf_rj[k] + *(nextai[k]); 4393 aa = abuf_r[k] + *(nextai[k]); 4394 nextaj = 0; 4395 for (j=0; nextaj<anzi; j++){ 4396 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4397 ba_i[j] += aa[nextaj++]; 4398 } 4399 } 4400 nextrow[k]++; nextai[k]++; 4401 } 4402 } 4403 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4404 } 4405 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4406 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4407 4408 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4409 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4410 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4411 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4412 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 4416 #undef __FUNCT__ 4417 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4418 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4419 { 4420 PetscErrorCode ierr; 4421 Mat B_mpi; 4422 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4423 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4424 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4425 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4426 PetscInt len,proc,*dnz,*onz; 4427 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4428 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4429 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4430 MPI_Status *status; 4431 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4432 PetscBT lnkbt; 4433 Mat_Merge_SeqsToMPI *merge; 4434 PetscContainer container; 4435 4436 PetscFunctionBegin; 4437 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4438 4439 /* make sure it is a PETSc comm */ 4440 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4441 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4442 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4443 4444 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4445 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4446 4447 /* determine row ownership */ 4448 /*---------------------------------------------------------*/ 4449 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4450 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4451 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4452 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4453 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4454 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4455 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4456 4457 m = merge->rowmap->n; 4458 M = merge->rowmap->N; 4459 owners = merge->rowmap->range; 4460 4461 /* determine the number of messages to send, their lengths */ 4462 /*---------------------------------------------------------*/ 4463 len_s = merge->len_s; 4464 4465 len = 0; /* length of buf_si[] */ 4466 merge->nsend = 0; 4467 for (proc=0; proc<size; proc++){ 4468 len_si[proc] = 0; 4469 if (proc == rank){ 4470 len_s[proc] = 0; 4471 } else { 4472 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4473 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4474 } 4475 if (len_s[proc]) { 4476 merge->nsend++; 4477 nrows = 0; 4478 for (i=owners[proc]; i<owners[proc+1]; i++){ 4479 if (ai[i+1] > ai[i]) nrows++; 4480 } 4481 len_si[proc] = 2*(nrows+1); 4482 len += len_si[proc]; 4483 } 4484 } 4485 4486 /* determine the number and length of messages to receive for ij-structure */ 4487 /*-------------------------------------------------------------------------*/ 4488 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4489 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4490 4491 /* post the Irecv of j-structure */ 4492 /*-------------------------------*/ 4493 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4494 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4495 4496 /* post the Isend of j-structure */ 4497 /*--------------------------------*/ 4498 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4499 4500 for (proc=0, k=0; proc<size; proc++){ 4501 if (!len_s[proc]) continue; 4502 i = owners[proc]; 4503 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4504 k++; 4505 } 4506 4507 /* receives and sends of j-structure are complete */ 4508 /*------------------------------------------------*/ 4509 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4510 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4511 4512 /* send and recv i-structure */ 4513 /*---------------------------*/ 4514 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4515 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4516 4517 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4518 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4519 for (proc=0,k=0; proc<size; proc++){ 4520 if (!len_s[proc]) continue; 4521 /* form outgoing message for i-structure: 4522 buf_si[0]: nrows to be sent 4523 [1:nrows]: row index (global) 4524 [nrows+1:2*nrows+1]: i-structure index 4525 */ 4526 /*-------------------------------------------*/ 4527 nrows = len_si[proc]/2 - 1; 4528 buf_si_i = buf_si + nrows+1; 4529 buf_si[0] = nrows; 4530 buf_si_i[0] = 0; 4531 nrows = 0; 4532 for (i=owners[proc]; i<owners[proc+1]; i++){ 4533 anzi = ai[i+1] - ai[i]; 4534 if (anzi) { 4535 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4536 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4537 nrows++; 4538 } 4539 } 4540 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4541 k++; 4542 buf_si += len_si[proc]; 4543 } 4544 4545 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4546 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4547 4548 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4549 for (i=0; i<merge->nrecv; i++){ 4550 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4551 } 4552 4553 ierr = PetscFree(len_si);CHKERRQ(ierr); 4554 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4555 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4556 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4557 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4558 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4559 ierr = PetscFree(status);CHKERRQ(ierr); 4560 4561 /* compute a local seq matrix in each processor */ 4562 /*----------------------------------------------*/ 4563 /* allocate bi array and free space for accumulating nonzero column info */ 4564 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4565 bi[0] = 0; 4566 4567 /* create and initialize a linked list */ 4568 nlnk = N+1; 4569 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4570 4571 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4572 len = 0; 4573 len = ai[owners[rank+1]] - ai[owners[rank]]; 4574 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4575 current_space = free_space; 4576 4577 /* determine symbolic info for each local row */ 4578 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4579 4580 for (k=0; k<merge->nrecv; k++){ 4581 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4582 nrows = *buf_ri_k[k]; 4583 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4584 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4585 } 4586 4587 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4588 len = 0; 4589 for (i=0;i<m;i++) { 4590 bnzi = 0; 4591 /* add local non-zero cols of this proc's seqmat into lnk */ 4592 arow = owners[rank] + i; 4593 anzi = ai[arow+1] - ai[arow]; 4594 aj = a->j + ai[arow]; 4595 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4596 bnzi += nlnk; 4597 /* add received col data into lnk */ 4598 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4599 if (i == *nextrow[k]) { /* i-th row */ 4600 anzi = *(nextai[k]+1) - *nextai[k]; 4601 aj = buf_rj[k] + *nextai[k]; 4602 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4603 bnzi += nlnk; 4604 nextrow[k]++; nextai[k]++; 4605 } 4606 } 4607 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4608 4609 /* if free space is not available, make more free space */ 4610 if (current_space->local_remaining<bnzi) { 4611 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4612 nspacedouble++; 4613 } 4614 /* copy data into free space, then initialize lnk */ 4615 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4616 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4617 4618 current_space->array += bnzi; 4619 current_space->local_used += bnzi; 4620 current_space->local_remaining -= bnzi; 4621 4622 bi[i+1] = bi[i] + bnzi; 4623 } 4624 4625 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4626 4627 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4628 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4629 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4630 4631 /* create symbolic parallel matrix B_mpi */ 4632 /*---------------------------------------*/ 4633 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4634 if (n==PETSC_DECIDE) { 4635 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4636 } else { 4637 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4638 } 4639 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4640 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4641 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4642 4643 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4644 B_mpi->assembled = PETSC_FALSE; 4645 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4646 merge->bi = bi; 4647 merge->bj = bj; 4648 merge->buf_ri = buf_ri; 4649 merge->buf_rj = buf_rj; 4650 merge->coi = PETSC_NULL; 4651 merge->coj = PETSC_NULL; 4652 merge->owners_co = PETSC_NULL; 4653 4654 /* attach the supporting struct to B_mpi for reuse */ 4655 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4656 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4657 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4658 *mpimat = B_mpi; 4659 4660 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4661 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4662 PetscFunctionReturn(0); 4663 } 4664 4665 #undef __FUNCT__ 4666 #define __FUNCT__ "MatMerge_SeqsToMPI" 4667 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4668 { 4669 PetscErrorCode ierr; 4670 4671 PetscFunctionBegin; 4672 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4673 if (scall == MAT_INITIAL_MATRIX){ 4674 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4675 } 4676 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4677 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4678 PetscFunctionReturn(0); 4679 } 4680 4681 #undef __FUNCT__ 4682 #define __FUNCT__ "MatGetLocalMat" 4683 /*@ 4684 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4685 4686 Not Collective 4687 4688 Input Parameters: 4689 + A - the matrix 4690 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4691 4692 Output Parameter: 4693 . A_loc - the local sequential matrix generated 4694 4695 Level: developer 4696 4697 @*/ 4698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4699 { 4700 PetscErrorCode ierr; 4701 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4702 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4703 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4704 MatScalar *aa=a->a,*ba=b->a,*cam; 4705 PetscScalar *ca; 4706 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4707 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4708 4709 PetscFunctionBegin; 4710 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4711 if (scall == MAT_INITIAL_MATRIX){ 4712 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4713 ci[0] = 0; 4714 for (i=0; i<am; i++){ 4715 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4716 } 4717 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4718 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4719 k = 0; 4720 for (i=0; i<am; i++) { 4721 ncols_o = bi[i+1] - bi[i]; 4722 ncols_d = ai[i+1] - ai[i]; 4723 /* off-diagonal portion of A */ 4724 for (jo=0; jo<ncols_o; jo++) { 4725 col = cmap[*bj]; 4726 if (col >= cstart) break; 4727 cj[k] = col; bj++; 4728 ca[k++] = *ba++; 4729 } 4730 /* diagonal portion of A */ 4731 for (j=0; j<ncols_d; j++) { 4732 cj[k] = cstart + *aj++; 4733 ca[k++] = *aa++; 4734 } 4735 /* off-diagonal portion of A */ 4736 for (j=jo; j<ncols_o; j++) { 4737 cj[k] = cmap[*bj++]; 4738 ca[k++] = *ba++; 4739 } 4740 } 4741 /* put together the new matrix */ 4742 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4743 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4744 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4745 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4746 mat->free_a = PETSC_TRUE; 4747 mat->free_ij = PETSC_TRUE; 4748 mat->nonew = 0; 4749 } else if (scall == MAT_REUSE_MATRIX){ 4750 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4751 ci = mat->i; cj = mat->j; cam = mat->a; 4752 for (i=0; i<am; i++) { 4753 /* off-diagonal portion of A */ 4754 ncols_o = bi[i+1] - bi[i]; 4755 for (jo=0; jo<ncols_o; jo++) { 4756 col = cmap[*bj]; 4757 if (col >= cstart) break; 4758 *cam++ = *ba++; bj++; 4759 } 4760 /* diagonal portion of A */ 4761 ncols_d = ai[i+1] - ai[i]; 4762 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4763 /* off-diagonal portion of A */ 4764 for (j=jo; j<ncols_o; j++) { 4765 *cam++ = *ba++; bj++; 4766 } 4767 } 4768 } else { 4769 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4770 } 4771 4772 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4773 PetscFunctionReturn(0); 4774 } 4775 4776 #undef __FUNCT__ 4777 #define __FUNCT__ "MatGetLocalMatCondensed" 4778 /*@C 4779 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4780 4781 Not Collective 4782 4783 Input Parameters: 4784 + A - the matrix 4785 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4786 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4787 4788 Output Parameter: 4789 . A_loc - the local sequential matrix generated 4790 4791 Level: developer 4792 4793 @*/ 4794 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4795 { 4796 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4797 PetscErrorCode ierr; 4798 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4799 IS isrowa,iscola; 4800 Mat *aloc; 4801 4802 PetscFunctionBegin; 4803 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4804 if (!row){ 4805 start = A->rmap->rstart; end = A->rmap->rend; 4806 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4807 } else { 4808 isrowa = *row; 4809 } 4810 if (!col){ 4811 start = A->cmap->rstart; 4812 cmap = a->garray; 4813 nzA = a->A->cmap->n; 4814 nzB = a->B->cmap->n; 4815 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4816 ncols = 0; 4817 for (i=0; i<nzB; i++) { 4818 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4819 else break; 4820 } 4821 imark = i; 4822 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4823 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4824 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4825 } else { 4826 iscola = *col; 4827 } 4828 if (scall != MAT_INITIAL_MATRIX){ 4829 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4830 aloc[0] = *A_loc; 4831 } 4832 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4833 *A_loc = aloc[0]; 4834 ierr = PetscFree(aloc);CHKERRQ(ierr); 4835 if (!row){ 4836 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4837 } 4838 if (!col){ 4839 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4840 } 4841 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4842 PetscFunctionReturn(0); 4843 } 4844 4845 #undef __FUNCT__ 4846 #define __FUNCT__ "MatGetBrowsOfAcols" 4847 /*@C 4848 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4849 4850 Collective on Mat 4851 4852 Input Parameters: 4853 + A,B - the matrices in mpiaij format 4854 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4855 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4856 4857 Output Parameter: 4858 + rowb, colb - index sets of rows and columns of B to extract 4859 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4860 - B_seq - the sequential matrix generated 4861 4862 Level: developer 4863 4864 @*/ 4865 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4866 { 4867 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4868 PetscErrorCode ierr; 4869 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4870 IS isrowb,iscolb; 4871 Mat *bseq; 4872 4873 PetscFunctionBegin; 4874 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4875 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4876 } 4877 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4878 4879 if (scall == MAT_INITIAL_MATRIX){ 4880 start = A->cmap->rstart; 4881 cmap = a->garray; 4882 nzA = a->A->cmap->n; 4883 nzB = a->B->cmap->n; 4884 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4885 ncols = 0; 4886 for (i=0; i<nzB; i++) { /* row < local row index */ 4887 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4888 else break; 4889 } 4890 imark = i; 4891 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4892 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4893 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4894 *brstart = imark; 4895 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4896 } else { 4897 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4898 isrowb = *rowb; iscolb = *colb; 4899 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4900 bseq[0] = *B_seq; 4901 } 4902 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4903 *B_seq = bseq[0]; 4904 ierr = PetscFree(bseq);CHKERRQ(ierr); 4905 if (!rowb){ 4906 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4907 } else { 4908 *rowb = isrowb; 4909 } 4910 if (!colb){ 4911 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4912 } else { 4913 *colb = iscolb; 4914 } 4915 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 #undef __FUNCT__ 4920 #define __FUNCT__ "MatGetBrowsOfAoCols" 4921 /*@C 4922 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4923 of the OFF-DIAGONAL portion of local A 4924 4925 Collective on Mat 4926 4927 Input Parameters: 4928 + A,B - the matrices in mpiaij format 4929 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4930 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4931 . startsj_r - similar to startsj for receives 4932 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4933 4934 Output Parameter: 4935 + B_oth - the sequential matrix generated 4936 4937 Level: developer 4938 4939 @*/ 4940 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4941 { 4942 VecScatter_MPI_General *gen_to,*gen_from; 4943 PetscErrorCode ierr; 4944 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4945 Mat_SeqAIJ *b_oth; 4946 VecScatter ctx=a->Mvctx; 4947 MPI_Comm comm=((PetscObject)ctx)->comm; 4948 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4949 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4950 PetscScalar *rvalues,*svalues; 4951 MatScalar *b_otha,*bufa,*bufA; 4952 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4953 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4954 MPI_Status *sstatus,rstatus; 4955 PetscMPIInt jj; 4956 PetscInt *cols,sbs,rbs; 4957 PetscScalar *vals; 4958 4959 PetscFunctionBegin; 4960 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4961 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4962 } 4963 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4964 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4965 4966 gen_to = (VecScatter_MPI_General*)ctx->todata; 4967 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4968 rvalues = gen_from->values; /* holds the length of receiving row */ 4969 svalues = gen_to->values; /* holds the length of sending row */ 4970 nrecvs = gen_from->n; 4971 nsends = gen_to->n; 4972 4973 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4974 srow = gen_to->indices; /* local row index to be sent */ 4975 sstarts = gen_to->starts; 4976 sprocs = gen_to->procs; 4977 sstatus = gen_to->sstatus; 4978 sbs = gen_to->bs; 4979 rstarts = gen_from->starts; 4980 rprocs = gen_from->procs; 4981 rbs = gen_from->bs; 4982 4983 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4984 if (scall == MAT_INITIAL_MATRIX){ 4985 /* i-array */ 4986 /*---------*/ 4987 /* post receives */ 4988 for (i=0; i<nrecvs; i++){ 4989 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4990 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4991 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4992 } 4993 4994 /* pack the outgoing message */ 4995 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4996 sstartsj[0] = 0; rstartsj[0] = 0; 4997 len = 0; /* total length of j or a array to be sent */ 4998 k = 0; 4999 for (i=0; i<nsends; i++){ 5000 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5001 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5002 for (j=0; j<nrows; j++) { 5003 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5004 for (l=0; l<sbs; l++){ 5005 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5006 rowlen[j*sbs+l] = ncols; 5007 len += ncols; 5008 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5009 } 5010 k++; 5011 } 5012 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5013 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5014 } 5015 /* recvs and sends of i-array are completed */ 5016 i = nrecvs; 5017 while (i--) { 5018 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5019 } 5020 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5021 5022 /* allocate buffers for sending j and a arrays */ 5023 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5024 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5025 5026 /* create i-array of B_oth */ 5027 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5028 b_othi[0] = 0; 5029 len = 0; /* total length of j or a array to be received */ 5030 k = 0; 5031 for (i=0; i<nrecvs; i++){ 5032 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5033 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5034 for (j=0; j<nrows; j++) { 5035 b_othi[k+1] = b_othi[k] + rowlen[j]; 5036 len += rowlen[j]; k++; 5037 } 5038 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5039 } 5040 5041 /* allocate space for j and a arrrays of B_oth */ 5042 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5043 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5044 5045 /* j-array */ 5046 /*---------*/ 5047 /* post receives of j-array */ 5048 for (i=0; i<nrecvs; i++){ 5049 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5050 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5051 } 5052 5053 /* pack the outgoing message j-array */ 5054 k = 0; 5055 for (i=0; i<nsends; i++){ 5056 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5057 bufJ = bufj+sstartsj[i]; 5058 for (j=0; j<nrows; j++) { 5059 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5060 for (ll=0; ll<sbs; ll++){ 5061 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5062 for (l=0; l<ncols; l++){ 5063 *bufJ++ = cols[l]; 5064 } 5065 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5066 } 5067 } 5068 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5069 } 5070 5071 /* recvs and sends of j-array are completed */ 5072 i = nrecvs; 5073 while (i--) { 5074 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5075 } 5076 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5077 } else if (scall == MAT_REUSE_MATRIX){ 5078 sstartsj = *startsj; 5079 rstartsj = *startsj_r; 5080 bufa = *bufa_ptr; 5081 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5082 b_otha = b_oth->a; 5083 } else { 5084 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5085 } 5086 5087 /* a-array */ 5088 /*---------*/ 5089 /* post receives of a-array */ 5090 for (i=0; i<nrecvs; i++){ 5091 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5092 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5093 } 5094 5095 /* pack the outgoing message a-array */ 5096 k = 0; 5097 for (i=0; i<nsends; i++){ 5098 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5099 bufA = bufa+sstartsj[i]; 5100 for (j=0; j<nrows; j++) { 5101 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5102 for (ll=0; ll<sbs; ll++){ 5103 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5104 for (l=0; l<ncols; l++){ 5105 *bufA++ = vals[l]; 5106 } 5107 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5108 } 5109 } 5110 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5111 } 5112 /* recvs and sends of a-array are completed */ 5113 i = nrecvs; 5114 while (i--) { 5115 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5116 } 5117 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5118 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5119 5120 if (scall == MAT_INITIAL_MATRIX){ 5121 /* put together the new matrix */ 5122 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5123 5124 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5125 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5126 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5127 b_oth->free_a = PETSC_TRUE; 5128 b_oth->free_ij = PETSC_TRUE; 5129 b_oth->nonew = 0; 5130 5131 ierr = PetscFree(bufj);CHKERRQ(ierr); 5132 if (!startsj || !bufa_ptr){ 5133 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5134 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5135 } else { 5136 *startsj = sstartsj; 5137 *startsj_r = rstartsj; 5138 *bufa_ptr = bufa; 5139 } 5140 } 5141 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5142 PetscFunctionReturn(0); 5143 } 5144 5145 #undef __FUNCT__ 5146 #define __FUNCT__ "MatGetCommunicationStructs" 5147 /*@C 5148 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5149 5150 Not Collective 5151 5152 Input Parameters: 5153 . A - The matrix in mpiaij format 5154 5155 Output Parameter: 5156 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5157 . colmap - A map from global column index to local index into lvec 5158 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5159 5160 Level: developer 5161 5162 @*/ 5163 #if defined (PETSC_USE_CTABLE) 5164 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5165 #else 5166 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5167 #endif 5168 { 5169 Mat_MPIAIJ *a; 5170 5171 PetscFunctionBegin; 5172 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5173 PetscValidPointer(lvec, 2); 5174 PetscValidPointer(colmap, 3); 5175 PetscValidPointer(multScatter, 4); 5176 a = (Mat_MPIAIJ *) A->data; 5177 if (lvec) *lvec = a->lvec; 5178 if (colmap) *colmap = a->colmap; 5179 if (multScatter) *multScatter = a->Mvctx; 5180 PetscFunctionReturn(0); 5181 } 5182 5183 EXTERN_C_BEGIN 5184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5186 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5187 EXTERN_C_END 5188 5189 #undef __FUNCT__ 5190 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5191 /* 5192 Computes (B'*A')' since computing B*A directly is untenable 5193 5194 n p p 5195 ( ) ( ) ( ) 5196 m ( A ) * n ( B ) = m ( C ) 5197 ( ) ( ) ( ) 5198 5199 */ 5200 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5201 { 5202 PetscErrorCode ierr; 5203 Mat At,Bt,Ct; 5204 5205 PetscFunctionBegin; 5206 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5207 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5208 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5209 ierr = MatDestroy(At);CHKERRQ(ierr); 5210 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5211 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5212 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5213 PetscFunctionReturn(0); 5214 } 5215 5216 #undef __FUNCT__ 5217 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5218 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5219 { 5220 PetscErrorCode ierr; 5221 PetscInt m=A->rmap->n,n=B->cmap->n; 5222 Mat Cmat; 5223 5224 PetscFunctionBegin; 5225 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5226 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5227 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5228 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5229 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5230 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5231 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5232 *C = Cmat; 5233 PetscFunctionReturn(0); 5234 } 5235 5236 /* ----------------------------------------------------------------*/ 5237 #undef __FUNCT__ 5238 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5239 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5240 { 5241 PetscErrorCode ierr; 5242 5243 PetscFunctionBegin; 5244 if (scall == MAT_INITIAL_MATRIX){ 5245 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5246 } 5247 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5248 PetscFunctionReturn(0); 5249 } 5250 5251 EXTERN_C_BEGIN 5252 #if defined(PETSC_HAVE_MUMPS) 5253 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5254 #endif 5255 #if defined(PETSC_HAVE_PASTIX) 5256 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5257 #endif 5258 #if defined(PETSC_HAVE_SUPERLU_DIST) 5259 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5260 #endif 5261 #if defined(PETSC_HAVE_SPOOLES) 5262 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5263 #endif 5264 EXTERN_C_END 5265 5266 /*MC 5267 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5268 5269 Options Database Keys: 5270 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5271 5272 Level: beginner 5273 5274 .seealso: MatCreateMPIAIJ() 5275 M*/ 5276 5277 EXTERN_C_BEGIN 5278 #undef __FUNCT__ 5279 #define __FUNCT__ "MatCreate_MPIAIJ" 5280 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5281 { 5282 Mat_MPIAIJ *b; 5283 PetscErrorCode ierr; 5284 PetscMPIInt size; 5285 5286 PetscFunctionBegin; 5287 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5288 5289 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5290 B->data = (void*)b; 5291 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5292 B->rmap->bs = 1; 5293 B->assembled = PETSC_FALSE; 5294 B->mapping = 0; 5295 5296 B->insertmode = NOT_SET_VALUES; 5297 b->size = size; 5298 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5299 5300 /* build cache for off array entries formed */ 5301 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5302 b->donotstash = PETSC_FALSE; 5303 b->colmap = 0; 5304 b->garray = 0; 5305 b->roworiented = PETSC_TRUE; 5306 5307 /* stuff used for matrix vector multiply */ 5308 b->lvec = PETSC_NULL; 5309 b->Mvctx = PETSC_NULL; 5310 5311 /* stuff for MatGetRow() */ 5312 b->rowindices = 0; 5313 b->rowvalues = 0; 5314 b->getrowactive = PETSC_FALSE; 5315 5316 #if defined(PETSC_HAVE_SPOOLES) 5317 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5318 "MatGetFactor_mpiaij_spooles", 5319 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5320 #endif 5321 #if defined(PETSC_HAVE_MUMPS) 5322 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5323 "MatGetFactor_aij_mumps", 5324 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5325 #endif 5326 #if defined(PETSC_HAVE_PASTIX) 5327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5328 "MatGetFactor_mpiaij_pastix", 5329 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5330 #endif 5331 #if defined(PETSC_HAVE_SUPERLU_DIST) 5332 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5333 "MatGetFactor_mpiaij_superlu_dist", 5334 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5335 #endif 5336 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5337 "MatStoreValues_MPIAIJ", 5338 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5340 "MatRetrieveValues_MPIAIJ", 5341 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5343 "MatGetDiagonalBlock_MPIAIJ", 5344 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5346 "MatIsTranspose_MPIAIJ", 5347 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5349 "MatMPIAIJSetPreallocation_MPIAIJ", 5350 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5352 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5353 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5355 "MatDiagonalScaleLocal_MPIAIJ", 5356 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5358 "MatConvert_MPIAIJ_MPIAIJPERM", 5359 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5361 "MatConvert_MPIAIJ_MPIAIJCRL", 5362 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5364 "MatConvert_MPIAIJ_MPISBAIJ", 5365 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5367 "MatMatMult_MPIDense_MPIAIJ", 5368 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5370 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5371 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5373 "MatMatMultNumeric_MPIDense_MPIAIJ", 5374 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5375 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5376 PetscFunctionReturn(0); 5377 } 5378 EXTERN_C_END 5379 5380 #undef __FUNCT__ 5381 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5382 /*@ 5383 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5384 and "off-diagonal" part of the matrix in CSR format. 5385 5386 Collective on MPI_Comm 5387 5388 Input Parameters: 5389 + comm - MPI communicator 5390 . m - number of local rows (Cannot be PETSC_DECIDE) 5391 . n - This value should be the same as the local size used in creating the 5392 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5393 calculated if N is given) For square matrices n is almost always m. 5394 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5395 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5396 . i - row indices for "diagonal" portion of matrix 5397 . j - column indices 5398 . a - matrix values 5399 . oi - row indices for "off-diagonal" portion of matrix 5400 . oj - column indices 5401 - oa - matrix values 5402 5403 Output Parameter: 5404 . mat - the matrix 5405 5406 Level: advanced 5407 5408 Notes: 5409 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5410 5411 The i and j indices are 0 based 5412 5413 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5414 5415 This sets local rows and cannot be used to set off-processor values. 5416 5417 You cannot later use MatSetValues() to change values in this matrix. 5418 5419 .keywords: matrix, aij, compressed row, sparse, parallel 5420 5421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5422 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5423 @*/ 5424 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5425 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5426 { 5427 PetscErrorCode ierr; 5428 Mat_MPIAIJ *maij; 5429 5430 PetscFunctionBegin; 5431 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5432 if (i[0]) { 5433 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5434 } 5435 if (oi[0]) { 5436 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5437 } 5438 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5439 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5440 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5441 maij = (Mat_MPIAIJ*) (*mat)->data; 5442 maij->donotstash = PETSC_TRUE; 5443 (*mat)->preallocated = PETSC_TRUE; 5444 5445 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5446 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5447 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5448 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5449 5450 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5451 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5452 5453 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5454 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5455 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5456 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5457 5458 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5459 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5460 PetscFunctionReturn(0); 5461 } 5462 5463 /* 5464 Special version for direct calls from Fortran 5465 */ 5466 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5467 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5468 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5469 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5470 #endif 5471 5472 /* Change these macros so can be used in void function */ 5473 #undef CHKERRQ 5474 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5475 #undef SETERRQ2 5476 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5477 #undef SETERRQ 5478 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5479 5480 EXTERN_C_BEGIN 5481 #undef __FUNCT__ 5482 #define __FUNCT__ "matsetvaluesmpiaij_" 5483 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5484 { 5485 Mat mat = *mmat; 5486 PetscInt m = *mm, n = *mn; 5487 InsertMode addv = *maddv; 5488 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5489 PetscScalar value; 5490 PetscErrorCode ierr; 5491 5492 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5493 if (mat->insertmode == NOT_SET_VALUES) { 5494 mat->insertmode = addv; 5495 } 5496 #if defined(PETSC_USE_DEBUG) 5497 else if (mat->insertmode != addv) { 5498 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5499 } 5500 #endif 5501 { 5502 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5503 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5504 PetscBool roworiented = aij->roworiented; 5505 5506 /* Some Variables required in the macro */ 5507 Mat A = aij->A; 5508 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5509 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5510 MatScalar *aa = a->a; 5511 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5512 Mat B = aij->B; 5513 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5514 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5515 MatScalar *ba = b->a; 5516 5517 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5518 PetscInt nonew = a->nonew; 5519 MatScalar *ap1,*ap2; 5520 5521 PetscFunctionBegin; 5522 for (i=0; i<m; i++) { 5523 if (im[i] < 0) continue; 5524 #if defined(PETSC_USE_DEBUG) 5525 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5526 #endif 5527 if (im[i] >= rstart && im[i] < rend) { 5528 row = im[i] - rstart; 5529 lastcol1 = -1; 5530 rp1 = aj + ai[row]; 5531 ap1 = aa + ai[row]; 5532 rmax1 = aimax[row]; 5533 nrow1 = ailen[row]; 5534 low1 = 0; 5535 high1 = nrow1; 5536 lastcol2 = -1; 5537 rp2 = bj + bi[row]; 5538 ap2 = ba + bi[row]; 5539 rmax2 = bimax[row]; 5540 nrow2 = bilen[row]; 5541 low2 = 0; 5542 high2 = nrow2; 5543 5544 for (j=0; j<n; j++) { 5545 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5546 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5547 if (in[j] >= cstart && in[j] < cend){ 5548 col = in[j] - cstart; 5549 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5550 } else if (in[j] < 0) continue; 5551 #if defined(PETSC_USE_DEBUG) 5552 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5553 #endif 5554 else { 5555 if (mat->was_assembled) { 5556 if (!aij->colmap) { 5557 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5558 } 5559 #if defined (PETSC_USE_CTABLE) 5560 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5561 col--; 5562 #else 5563 col = aij->colmap[in[j]] - 1; 5564 #endif 5565 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5566 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5567 col = in[j]; 5568 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5569 B = aij->B; 5570 b = (Mat_SeqAIJ*)B->data; 5571 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5572 rp2 = bj + bi[row]; 5573 ap2 = ba + bi[row]; 5574 rmax2 = bimax[row]; 5575 nrow2 = bilen[row]; 5576 low2 = 0; 5577 high2 = nrow2; 5578 bm = aij->B->rmap->n; 5579 ba = b->a; 5580 } 5581 } else col = in[j]; 5582 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5583 } 5584 } 5585 } else { 5586 if (!aij->donotstash) { 5587 if (roworiented) { 5588 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5589 } else { 5590 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5591 } 5592 } 5593 } 5594 }} 5595 PetscFunctionReturnVoid(); 5596 } 5597 EXTERN_C_END 5598 5599