1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscBool found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 if (A->nooffproczerorows) { 606 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 607 nrecvs = nsends; 608 nmax = N; 609 } else { 610 /* inform other processors of number of messages and max length*/ 611 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 612 } 613 614 /* post receives: */ 615 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 616 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 617 for (i=0; i<nrecvs; i++) { 618 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 619 } 620 621 /* do sends: 622 1) starts[i] gives the starting index in svalues for stuff going to 623 the ith processor 624 */ 625 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 626 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 627 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 628 starts[0] = 0; 629 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 for (i=0; i<N; i++) { 631 svalues[starts[owner[i]]++] = rows[i]; 632 } 633 634 starts[0] = 0; 635 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 636 count = 0; 637 for (i=0; i<size; i++) { 638 if (nprocs[2*i+1]) { 639 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 640 } 641 } 642 ierr = PetscFree(starts);CHKERRQ(ierr); 643 644 base = owners[rank]; 645 646 /* wait on receives */ 647 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 648 count = nrecvs; slen = 0; 649 while (count) { 650 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 651 /* unpack receives into our local space */ 652 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 653 source[imdex] = recv_status.MPI_SOURCE; 654 lens[imdex] = n; 655 slen += n; 656 count--; 657 } 658 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 659 660 /* move the data into the send scatter */ 661 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 662 count = 0; 663 for (i=0; i<nrecvs; i++) { 664 values = rvalues + i*nmax; 665 for (j=0; j<lens[i]; j++) { 666 lrows[count++] = values[j] - base; 667 } 668 } 669 ierr = PetscFree(rvalues);CHKERRQ(ierr); 670 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 671 ierr = PetscFree(owner);CHKERRQ(ierr); 672 ierr = PetscFree(nprocs);CHKERRQ(ierr); 673 674 /* actually zap the local rows */ 675 /* 676 Zero the required rows. If the "diagonal block" of the matrix 677 is square and the user wishes to set the diagonal we use separate 678 code so that MatSetValues() is not called for each diagonal allocating 679 new memory, thus calling lots of mallocs and slowing things down. 680 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 726 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 728 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 736 { 737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatMultAdd_MPIAIJ" 747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 748 { 749 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 755 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 763 { 764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 765 PetscErrorCode ierr; 766 PetscBool merged; 767 768 PetscFunctionBegin; 769 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 770 /* do nondiagonal part */ 771 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 772 if (!merged) { 773 /* send it on its way */ 774 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 775 /* do local part */ 776 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 777 /* receive remote parts: note this assumes the values are not actually */ 778 /* added in yy until the next line, */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } else { 781 /* do local part */ 782 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 783 /* send it on its way */ 784 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 /* values actually were received in the Begin() but we need to call this nop */ 786 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 EXTERN_C_BEGIN 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 795 { 796 MPI_Comm comm; 797 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 798 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 799 IS Me,Notme; 800 PetscErrorCode ierr; 801 PetscInt M,N,first,last,*notme,i; 802 PetscMPIInt size; 803 804 PetscFunctionBegin; 805 806 /* Easy test: symmetric diagonal block */ 807 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 808 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 809 if (!*f) PetscFunctionReturn(0); 810 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 812 if (size == 1) PetscFunctionReturn(0); 813 814 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 815 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 816 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 817 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 818 for (i=0; i<first; i++) notme[i] = i; 819 for (i=last; i<M; i++) notme[i-last+first] = i; 820 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 821 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 822 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 823 Aoff = Aoffs[0]; 824 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 825 Boff = Boffs[0]; 826 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 828 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 829 ierr = ISDestroy(Me);CHKERRQ(ierr); 830 ierr = ISDestroy(Notme);CHKERRQ(ierr); 831 832 PetscFunctionReturn(0); 833 } 834 EXTERN_C_END 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 839 { 840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 /* do nondiagonal part */ 845 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 846 /* send it on its way */ 847 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 848 /* do local part */ 849 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 850 /* receive remote parts */ 851 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 /* 856 This only works correctly for square matrices where the subblock A->A is the 857 diagonal block 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 862 { 863 PetscErrorCode ierr; 864 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 865 866 PetscFunctionBegin; 867 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 868 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 900 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_CLASSID; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscBool isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscBool inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscBool isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscBool flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscBool iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscBool hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SPD: 1449 A->spd_set = PETSC_TRUE; 1450 A->spd = flg; 1451 if (flg) { 1452 A->symmetric = PETSC_TRUE; 1453 A->structurally_symmetric = PETSC_TRUE; 1454 A->symmetric_set = PETSC_TRUE; 1455 A->structurally_symmetric_set = PETSC_TRUE; 1456 } 1457 break; 1458 case MAT_SYMMETRIC: 1459 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1460 break; 1461 case MAT_STRUCTURALLY_SYMMETRIC: 1462 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1463 break; 1464 case MAT_HERMITIAN: 1465 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1466 break; 1467 case MAT_SYMMETRY_ETERNAL: 1468 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1469 break; 1470 default: 1471 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatGetRow_MPIAIJ" 1478 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1479 { 1480 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1481 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1482 PetscErrorCode ierr; 1483 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1484 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1485 PetscInt *cmap,*idx_p; 1486 1487 PetscFunctionBegin; 1488 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1489 mat->getrowactive = PETSC_TRUE; 1490 1491 if (!mat->rowvalues && (idx || v)) { 1492 /* 1493 allocate enough space to hold information from the longest row. 1494 */ 1495 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1496 PetscInt max = 1,tmp; 1497 for (i=0; i<matin->rmap->n; i++) { 1498 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1499 if (max < tmp) { max = tmp; } 1500 } 1501 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1502 } 1503 1504 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1505 lrow = row - rstart; 1506 1507 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1508 if (!v) {pvA = 0; pvB = 0;} 1509 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1510 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1512 nztot = nzA + nzB; 1513 1514 cmap = mat->garray; 1515 if (v || idx) { 1516 if (nztot) { 1517 /* Sort by increasing column numbers, assuming A and B already sorted */ 1518 PetscInt imark = -1; 1519 if (v) { 1520 *v = v_p = mat->rowvalues; 1521 for (i=0; i<nzB; i++) { 1522 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1523 else break; 1524 } 1525 imark = i; 1526 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1527 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1528 } 1529 if (idx) { 1530 *idx = idx_p = mat->rowindices; 1531 if (imark > -1) { 1532 for (i=0; i<imark; i++) { 1533 idx_p[i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 for (i=0; i<nzB; i++) { 1537 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1538 else break; 1539 } 1540 imark = i; 1541 } 1542 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1543 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1544 } 1545 } else { 1546 if (idx) *idx = 0; 1547 if (v) *v = 0; 1548 } 1549 } 1550 *nz = nztot; 1551 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1552 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1558 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1559 { 1560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1561 1562 PetscFunctionBegin; 1563 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1564 aij->getrowactive = PETSC_FALSE; 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatNorm_MPIAIJ" 1570 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1571 { 1572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1573 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1574 PetscErrorCode ierr; 1575 PetscInt i,j,cstart = mat->cmap->rstart; 1576 PetscReal sum = 0.0; 1577 MatScalar *v; 1578 1579 PetscFunctionBegin; 1580 if (aij->size == 1) { 1581 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1582 } else { 1583 if (type == NORM_FROBENIUS) { 1584 v = amat->a; 1585 for (i=0; i<amat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 v = bmat->a; 1593 for (i=0; i<bmat->nz; i++) { 1594 #if defined(PETSC_USE_COMPLEX) 1595 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1596 #else 1597 sum += (*v)*(*v); v++; 1598 #endif 1599 } 1600 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 *norm = sqrt(*norm); 1602 } else if (type == NORM_1) { /* max column norm */ 1603 PetscReal *tmp,*tmp2; 1604 PetscInt *jj,*garray = aij->garray; 1605 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1606 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1607 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1608 *norm = 0.0; 1609 v = amat->a; jj = amat->j; 1610 for (j=0; j<amat->nz; j++) { 1611 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1612 } 1613 v = bmat->a; jj = bmat->j; 1614 for (j=0; j<bmat->nz; j++) { 1615 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1616 } 1617 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1618 for (j=0; j<mat->cmap->N; j++) { 1619 if (tmp2[j] > *norm) *norm = tmp2[j]; 1620 } 1621 ierr = PetscFree(tmp);CHKERRQ(ierr); 1622 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1623 } else if (type == NORM_INFINITY) { /* max row norm */ 1624 PetscReal ntemp = 0.0; 1625 for (j=0; j<aij->A->rmap->n; j++) { 1626 v = amat->a + amat->i[j]; 1627 sum = 0.0; 1628 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1629 sum += PetscAbsScalar(*v); v++; 1630 } 1631 v = bmat->a + bmat->i[j]; 1632 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1633 sum += PetscAbsScalar(*v); v++; 1634 } 1635 if (sum > ntemp) ntemp = sum; 1636 } 1637 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1638 } else { 1639 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1640 } 1641 } 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatTranspose_MPIAIJ" 1647 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1648 { 1649 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1650 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1651 PetscErrorCode ierr; 1652 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1653 PetscInt cstart=A->cmap->rstart,ncol; 1654 Mat B; 1655 MatScalar *array; 1656 1657 PetscFunctionBegin; 1658 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1659 1660 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1661 ai = Aloc->i; aj = Aloc->j; 1662 bi = Bloc->i; bj = Bloc->j; 1663 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1664 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1665 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1666 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1667 for (i=0; i<ai[ma]; i++){ 1668 d_nnz[aj[i]] ++; 1669 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1670 } 1671 1672 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1673 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1674 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1675 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1676 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1677 } else { 1678 B = *matout; 1679 } 1680 1681 /* copy over the A part */ 1682 array = Aloc->a; 1683 row = A->rmap->rstart; 1684 for (i=0; i<ma; i++) { 1685 ncol = ai[i+1]-ai[i]; 1686 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1687 row++; array += ncol; aj += ncol; 1688 } 1689 aj = Aloc->j; 1690 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1691 1692 /* copy over the B part */ 1693 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1694 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1695 array = Bloc->a; 1696 row = A->rmap->rstart; 1697 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1698 cols_tmp = cols; 1699 for (i=0; i<mb; i++) { 1700 ncol = bi[i+1]-bi[i]; 1701 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1702 row++; array += ncol; cols_tmp += ncol; 1703 } 1704 ierr = PetscFree(cols);CHKERRQ(ierr); 1705 1706 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1707 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1708 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1709 *matout = B; 1710 } else { 1711 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1712 } 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1718 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1719 { 1720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1721 Mat a = aij->A,b = aij->B; 1722 PetscErrorCode ierr; 1723 PetscInt s1,s2,s3; 1724 1725 PetscFunctionBegin; 1726 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1727 if (rr) { 1728 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1729 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1730 /* Overlap communication with computation. */ 1731 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1732 } 1733 if (ll) { 1734 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1735 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1736 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1737 } 1738 /* scale the diagonal block */ 1739 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1740 1741 if (rr) { 1742 /* Do a scatter end and then right scale the off-diagonal block */ 1743 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1744 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1745 } 1746 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1752 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1753 { 1754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1759 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1760 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1761 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1766 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1767 { 1768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatEqual_MPIAIJ" 1778 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1779 { 1780 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1781 Mat a,b,c,d; 1782 PetscBool flg; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 a = matA->A; b = matA->B; 1787 c = matB->A; d = matB->B; 1788 1789 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1790 if (flg) { 1791 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1792 } 1793 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatCopy_MPIAIJ" 1799 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1800 { 1801 PetscErrorCode ierr; 1802 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1803 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1804 1805 PetscFunctionBegin; 1806 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1807 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1808 /* because of the column compression in the off-processor part of the matrix a->B, 1809 the number of columns in a->B and b->B may be different, hence we cannot call 1810 the MatCopy() directly on the two parts. If need be, we can provide a more 1811 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1812 then copying the submatrices */ 1813 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1814 } else { 1815 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1816 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1817 } 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1823 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1824 { 1825 PetscErrorCode ierr; 1826 1827 PetscFunctionBegin; 1828 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatAXPY_MPIAIJ" 1834 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1835 { 1836 PetscErrorCode ierr; 1837 PetscInt i; 1838 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1839 PetscBLASInt bnz,one=1; 1840 Mat_SeqAIJ *x,*y; 1841 1842 PetscFunctionBegin; 1843 if (str == SAME_NONZERO_PATTERN) { 1844 PetscScalar alpha = a; 1845 x = (Mat_SeqAIJ *)xx->A->data; 1846 y = (Mat_SeqAIJ *)yy->A->data; 1847 bnz = PetscBLASIntCast(x->nz); 1848 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 bnz = PetscBLASIntCast(x->nz); 1852 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1853 } else if (str == SUBSET_NONZERO_PATTERN) { 1854 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1855 1856 x = (Mat_SeqAIJ *)xx->B->data; 1857 y = (Mat_SeqAIJ *)yy->B->data; 1858 if (y->xtoy && y->XtoY != xx->B) { 1859 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1860 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1861 } 1862 if (!y->xtoy) { /* get xtoy */ 1863 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1864 y->XtoY = xx->B; 1865 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1866 } 1867 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1868 } else { 1869 Mat B; 1870 PetscInt *nnz_d,*nnz_o; 1871 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 1872 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 1873 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 1874 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1875 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 1876 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1877 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 1878 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 1879 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1880 ierr = MatHeaderReplace(Y,B); 1881 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1882 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatConjugate_MPIAIJ" 1891 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1892 { 1893 #if defined(PETSC_USE_COMPLEX) 1894 PetscErrorCode ierr; 1895 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1896 1897 PetscFunctionBegin; 1898 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1899 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1900 #else 1901 PetscFunctionBegin; 1902 #endif 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatRealPart_MPIAIJ" 1908 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1909 { 1910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1911 PetscErrorCode ierr; 1912 1913 PetscFunctionBegin; 1914 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1915 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1921 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1922 { 1923 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1924 PetscErrorCode ierr; 1925 1926 PetscFunctionBegin; 1927 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1928 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #ifdef PETSC_HAVE_PBGL 1933 1934 #include <boost/parallel/mpi/bsp_process_group.hpp> 1935 #include <boost/graph/distributed/ilu_default_graph.hpp> 1936 #include <boost/graph/distributed/ilu_0_block.hpp> 1937 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1938 #include <boost/graph/distributed/petsc/interface.hpp> 1939 #include <boost/multi_array.hpp> 1940 #include <boost/parallel/distributed_property_map->hpp> 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1944 /* 1945 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1946 */ 1947 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1948 { 1949 namespace petsc = boost::distributed::petsc; 1950 1951 namespace graph_dist = boost::graph::distributed; 1952 using boost::graph::distributed::ilu_default::process_group_type; 1953 using boost::graph::ilu_permuted; 1954 1955 PetscBool row_identity, col_identity; 1956 PetscContainer c; 1957 PetscInt m, n, M, N; 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1962 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1963 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1964 if (!row_identity || !col_identity) { 1965 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1966 } 1967 1968 process_group_type pg; 1969 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1970 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1971 lgraph_type& level_graph = *lgraph_p; 1972 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1973 1974 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1975 ilu_permuted(level_graph); 1976 1977 /* put together the new matrix */ 1978 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1979 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1980 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1981 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1982 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1983 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1984 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1985 1986 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1987 ierr = PetscContainerSetPointer(c, lgraph_p); 1988 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1989 PetscFunctionReturn(0); 1990 } 1991 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1994 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1995 { 1996 PetscFunctionBegin; 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatSolve_MPIAIJ" 2002 /* 2003 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2004 */ 2005 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2006 { 2007 namespace graph_dist = boost::graph::distributed; 2008 2009 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2010 lgraph_type* lgraph_p; 2011 PetscContainer c; 2012 PetscErrorCode ierr; 2013 2014 PetscFunctionBegin; 2015 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2016 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2017 ierr = VecCopy(b, x);CHKERRQ(ierr); 2018 2019 PetscScalar* array_x; 2020 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2021 PetscInt sx; 2022 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2023 2024 PetscScalar* array_b; 2025 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2026 PetscInt sb; 2027 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2028 2029 lgraph_type& level_graph = *lgraph_p; 2030 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2031 2032 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2033 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2034 ref_x(array_x, boost::extents[num_vertices(graph)]); 2035 2036 typedef boost::iterator_property_map<array_ref_type::iterator, 2037 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2038 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2039 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2040 2041 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2042 2043 PetscFunctionReturn(0); 2044 } 2045 #endif 2046 2047 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2048 PetscInt nzlocal,nsends,nrecvs; 2049 PetscMPIInt *send_rank,*recv_rank; 2050 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2051 PetscScalar *sbuf_a,**rbuf_a; 2052 PetscErrorCode (*MatDestroy)(Mat); 2053 } Mat_Redundant; 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2057 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2058 { 2059 PetscErrorCode ierr; 2060 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2061 PetscInt i; 2062 2063 PetscFunctionBegin; 2064 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2065 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2066 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2067 for (i=0; i<redund->nrecvs; i++){ 2068 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2069 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2070 } 2071 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2072 ierr = PetscFree(redund);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatDestroy_MatRedundant" 2078 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2079 { 2080 PetscErrorCode ierr; 2081 PetscContainer container; 2082 Mat_Redundant *redund=PETSC_NULL; 2083 2084 PetscFunctionBegin; 2085 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2086 if (container) { 2087 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2088 } else { 2089 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2090 } 2091 A->ops->destroy = redund->MatDestroy; 2092 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2093 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2094 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2100 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2101 { 2102 PetscMPIInt rank,size; 2103 MPI_Comm comm=((PetscObject)mat)->comm; 2104 PetscErrorCode ierr; 2105 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2106 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2107 PetscInt *rowrange=mat->rmap->range; 2108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2109 Mat A=aij->A,B=aij->B,C=*matredundant; 2110 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2111 PetscScalar *sbuf_a; 2112 PetscInt nzlocal=a->nz+b->nz; 2113 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2114 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2115 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2116 MatScalar *aworkA,*aworkB; 2117 PetscScalar *vals; 2118 PetscMPIInt tag1,tag2,tag3,imdex; 2119 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2120 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2121 MPI_Status recv_status,*send_status; 2122 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2123 PetscInt **rbuf_j=PETSC_NULL; 2124 PetscScalar **rbuf_a=PETSC_NULL; 2125 Mat_Redundant *redund=PETSC_NULL; 2126 PetscContainer container; 2127 2128 PetscFunctionBegin; 2129 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2130 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2131 2132 if (reuse == MAT_REUSE_MATRIX) { 2133 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2134 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2135 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2136 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2137 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2138 if (container) { 2139 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2140 } else { 2141 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2142 } 2143 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2144 2145 nsends = redund->nsends; 2146 nrecvs = redund->nrecvs; 2147 send_rank = redund->send_rank; 2148 recv_rank = redund->recv_rank; 2149 sbuf_nz = redund->sbuf_nz; 2150 rbuf_nz = redund->rbuf_nz; 2151 sbuf_j = redund->sbuf_j; 2152 sbuf_a = redund->sbuf_a; 2153 rbuf_j = redund->rbuf_j; 2154 rbuf_a = redund->rbuf_a; 2155 } 2156 2157 if (reuse == MAT_INITIAL_MATRIX){ 2158 PetscMPIInt subrank,subsize; 2159 PetscInt nleftover,np_subcomm; 2160 /* get the destination processors' id send_rank, nsends and nrecvs */ 2161 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2162 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2163 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2164 np_subcomm = size/nsubcomm; 2165 nleftover = size - nsubcomm*np_subcomm; 2166 nsends = 0; nrecvs = 0; 2167 for (i=0; i<size; i++){ /* i=rank*/ 2168 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2169 send_rank[nsends] = i; nsends++; 2170 recv_rank[nrecvs++] = i; 2171 } 2172 } 2173 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2174 i = size-nleftover-1; 2175 j = 0; 2176 while (j < nsubcomm - nleftover){ 2177 send_rank[nsends++] = i; 2178 i--; j++; 2179 } 2180 } 2181 2182 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2183 for (i=0; i<nleftover; i++){ 2184 recv_rank[nrecvs++] = size-nleftover+i; 2185 } 2186 } 2187 2188 /* allocate sbuf_j, sbuf_a */ 2189 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2190 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2191 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2192 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2193 2194 /* copy mat's local entries into the buffers */ 2195 if (reuse == MAT_INITIAL_MATRIX){ 2196 rownz_max = 0; 2197 rptr = sbuf_j; 2198 cols = sbuf_j + rend-rstart + 1; 2199 vals = sbuf_a; 2200 rptr[0] = 0; 2201 for (i=0; i<rend-rstart; i++){ 2202 row = i + rstart; 2203 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2204 ncols = nzA + nzB; 2205 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2206 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2207 /* load the column indices for this row into cols */ 2208 lwrite = 0; 2209 for (l=0; l<nzB; l++) { 2210 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2211 vals[lwrite] = aworkB[l]; 2212 cols[lwrite++] = ctmp; 2213 } 2214 } 2215 for (l=0; l<nzA; l++){ 2216 vals[lwrite] = aworkA[l]; 2217 cols[lwrite++] = cstart + cworkA[l]; 2218 } 2219 for (l=0; l<nzB; l++) { 2220 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2221 vals[lwrite] = aworkB[l]; 2222 cols[lwrite++] = ctmp; 2223 } 2224 } 2225 vals += ncols; 2226 cols += ncols; 2227 rptr[i+1] = rptr[i] + ncols; 2228 if (rownz_max < ncols) rownz_max = ncols; 2229 } 2230 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2231 } else { /* only copy matrix values into sbuf_a */ 2232 rptr = sbuf_j; 2233 vals = sbuf_a; 2234 rptr[0] = 0; 2235 for (i=0; i<rend-rstart; i++){ 2236 row = i + rstart; 2237 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2238 ncols = nzA + nzB; 2239 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2240 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2241 lwrite = 0; 2242 for (l=0; l<nzB; l++) { 2243 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2244 } 2245 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2246 for (l=0; l<nzB; l++) { 2247 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2248 } 2249 vals += ncols; 2250 rptr[i+1] = rptr[i] + ncols; 2251 } 2252 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2253 2254 /* send nzlocal to others, and recv other's nzlocal */ 2255 /*--------------------------------------------------*/ 2256 if (reuse == MAT_INITIAL_MATRIX){ 2257 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2258 s_waits2 = s_waits3 + nsends; 2259 s_waits1 = s_waits2 + nsends; 2260 r_waits1 = s_waits1 + nsends; 2261 r_waits2 = r_waits1 + nrecvs; 2262 r_waits3 = r_waits2 + nrecvs; 2263 } else { 2264 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2265 r_waits3 = s_waits3 + nsends; 2266 } 2267 2268 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2269 if (reuse == MAT_INITIAL_MATRIX){ 2270 /* get new tags to keep the communication clean */ 2271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2272 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2273 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2274 2275 /* post receives of other's nzlocal */ 2276 for (i=0; i<nrecvs; i++){ 2277 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2278 } 2279 /* send nzlocal to others */ 2280 for (i=0; i<nsends; i++){ 2281 sbuf_nz[i] = nzlocal; 2282 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2283 } 2284 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2285 count = nrecvs; 2286 while (count) { 2287 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2288 recv_rank[imdex] = recv_status.MPI_SOURCE; 2289 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2290 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2291 2292 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2293 rbuf_nz[imdex] += i + 2; 2294 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2295 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2296 count--; 2297 } 2298 /* wait on sends of nzlocal */ 2299 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2300 /* send mat->i,j to others, and recv from other's */ 2301 /*------------------------------------------------*/ 2302 for (i=0; i<nsends; i++){ 2303 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2304 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2305 } 2306 /* wait on receives of mat->i,j */ 2307 /*------------------------------*/ 2308 count = nrecvs; 2309 while (count) { 2310 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2311 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2312 count--; 2313 } 2314 /* wait on sends of mat->i,j */ 2315 /*---------------------------*/ 2316 if (nsends) { 2317 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2318 } 2319 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2320 2321 /* post receives, send and receive mat->a */ 2322 /*----------------------------------------*/ 2323 for (imdex=0; imdex<nrecvs; imdex++) { 2324 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2325 } 2326 for (i=0; i<nsends; i++){ 2327 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2328 } 2329 count = nrecvs; 2330 while (count) { 2331 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2332 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2333 count--; 2334 } 2335 if (nsends) { 2336 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2337 } 2338 2339 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2340 2341 /* create redundant matrix */ 2342 /*-------------------------*/ 2343 if (reuse == MAT_INITIAL_MATRIX){ 2344 /* compute rownz_max for preallocation */ 2345 for (imdex=0; imdex<nrecvs; imdex++){ 2346 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2347 rptr = rbuf_j[imdex]; 2348 for (i=0; i<j; i++){ 2349 ncols = rptr[i+1] - rptr[i]; 2350 if (rownz_max < ncols) rownz_max = ncols; 2351 } 2352 } 2353 2354 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2355 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2356 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2357 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2358 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2359 } else { 2360 C = *matredundant; 2361 } 2362 2363 /* insert local matrix entries */ 2364 rptr = sbuf_j; 2365 cols = sbuf_j + rend-rstart + 1; 2366 vals = sbuf_a; 2367 for (i=0; i<rend-rstart; i++){ 2368 row = i + rstart; 2369 ncols = rptr[i+1] - rptr[i]; 2370 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2371 vals += ncols; 2372 cols += ncols; 2373 } 2374 /* insert received matrix entries */ 2375 for (imdex=0; imdex<nrecvs; imdex++){ 2376 rstart = rowrange[recv_rank[imdex]]; 2377 rend = rowrange[recv_rank[imdex]+1]; 2378 rptr = rbuf_j[imdex]; 2379 cols = rbuf_j[imdex] + rend-rstart + 1; 2380 vals = rbuf_a[imdex]; 2381 for (i=0; i<rend-rstart; i++){ 2382 row = i + rstart; 2383 ncols = rptr[i+1] - rptr[i]; 2384 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2385 vals += ncols; 2386 cols += ncols; 2387 } 2388 } 2389 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2392 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2393 if (reuse == MAT_INITIAL_MATRIX){ 2394 PetscContainer container; 2395 *matredundant = C; 2396 /* create a supporting struct and attach it to C for reuse */ 2397 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2398 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2399 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2400 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2401 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2402 2403 redund->nzlocal = nzlocal; 2404 redund->nsends = nsends; 2405 redund->nrecvs = nrecvs; 2406 redund->send_rank = send_rank; 2407 redund->recv_rank = recv_rank; 2408 redund->sbuf_nz = sbuf_nz; 2409 redund->rbuf_nz = rbuf_nz; 2410 redund->sbuf_j = sbuf_j; 2411 redund->sbuf_a = sbuf_a; 2412 redund->rbuf_j = rbuf_j; 2413 redund->rbuf_a = rbuf_a; 2414 2415 redund->MatDestroy = C->ops->destroy; 2416 C->ops->destroy = MatDestroy_MatRedundant; 2417 } 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2423 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2424 { 2425 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2426 PetscErrorCode ierr; 2427 PetscInt i,*idxb = 0; 2428 PetscScalar *va,*vb; 2429 Vec vtmp; 2430 2431 PetscFunctionBegin; 2432 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2433 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2434 if (idx) { 2435 for (i=0; i<A->rmap->n; i++) { 2436 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2437 } 2438 } 2439 2440 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2441 if (idx) { 2442 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2443 } 2444 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2445 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2446 2447 for (i=0; i<A->rmap->n; i++){ 2448 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2449 va[i] = vb[i]; 2450 if (idx) idx[i] = a->garray[idxb[i]]; 2451 } 2452 } 2453 2454 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2455 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2456 if (idxb) { 2457 ierr = PetscFree(idxb);CHKERRQ(ierr); 2458 } 2459 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2465 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2466 { 2467 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2468 PetscErrorCode ierr; 2469 PetscInt i,*idxb = 0; 2470 PetscScalar *va,*vb; 2471 Vec vtmp; 2472 2473 PetscFunctionBegin; 2474 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2475 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2476 if (idx) { 2477 for (i=0; i<A->cmap->n; i++) { 2478 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2479 } 2480 } 2481 2482 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2483 if (idx) { 2484 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2485 } 2486 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2487 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2488 2489 for (i=0; i<A->rmap->n; i++){ 2490 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2491 va[i] = vb[i]; 2492 if (idx) idx[i] = a->garray[idxb[i]]; 2493 } 2494 } 2495 2496 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2497 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2498 if (idxb) { 2499 ierr = PetscFree(idxb);CHKERRQ(ierr); 2500 } 2501 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 #undef __FUNCT__ 2506 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2507 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2508 { 2509 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2510 PetscInt n = A->rmap->n; 2511 PetscInt cstart = A->cmap->rstart; 2512 PetscInt *cmap = mat->garray; 2513 PetscInt *diagIdx, *offdiagIdx; 2514 Vec diagV, offdiagV; 2515 PetscScalar *a, *diagA, *offdiagA; 2516 PetscInt r; 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2521 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2522 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2523 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2524 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2525 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2526 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2527 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2528 for(r = 0; r < n; ++r) { 2529 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2530 a[r] = diagA[r]; 2531 idx[r] = cstart + diagIdx[r]; 2532 } else { 2533 a[r] = offdiagA[r]; 2534 idx[r] = cmap[offdiagIdx[r]]; 2535 } 2536 } 2537 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2538 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2539 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2540 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2541 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2542 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2548 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2549 { 2550 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2551 PetscInt n = A->rmap->n; 2552 PetscInt cstart = A->cmap->rstart; 2553 PetscInt *cmap = mat->garray; 2554 PetscInt *diagIdx, *offdiagIdx; 2555 Vec diagV, offdiagV; 2556 PetscScalar *a, *diagA, *offdiagA; 2557 PetscInt r; 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2562 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2563 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2564 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2565 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2566 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2567 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2568 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2569 for(r = 0; r < n; ++r) { 2570 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2571 a[r] = diagA[r]; 2572 idx[r] = cstart + diagIdx[r]; 2573 } else { 2574 a[r] = offdiagA[r]; 2575 idx[r] = cmap[offdiagIdx[r]]; 2576 } 2577 } 2578 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2579 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2580 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2581 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2582 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2583 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2589 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2590 { 2591 PetscErrorCode ierr; 2592 Mat *dummy; 2593 2594 PetscFunctionBegin; 2595 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2596 *newmat = *dummy; 2597 ierr = PetscFree(dummy);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2602 /* -------------------------------------------------------------------*/ 2603 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2604 MatGetRow_MPIAIJ, 2605 MatRestoreRow_MPIAIJ, 2606 MatMult_MPIAIJ, 2607 /* 4*/ MatMultAdd_MPIAIJ, 2608 MatMultTranspose_MPIAIJ, 2609 MatMultTransposeAdd_MPIAIJ, 2610 #ifdef PETSC_HAVE_PBGL 2611 MatSolve_MPIAIJ, 2612 #else 2613 0, 2614 #endif 2615 0, 2616 0, 2617 /*10*/ 0, 2618 0, 2619 0, 2620 MatSOR_MPIAIJ, 2621 MatTranspose_MPIAIJ, 2622 /*15*/ MatGetInfo_MPIAIJ, 2623 MatEqual_MPIAIJ, 2624 MatGetDiagonal_MPIAIJ, 2625 MatDiagonalScale_MPIAIJ, 2626 MatNorm_MPIAIJ, 2627 /*20*/ MatAssemblyBegin_MPIAIJ, 2628 MatAssemblyEnd_MPIAIJ, 2629 MatSetOption_MPIAIJ, 2630 MatZeroEntries_MPIAIJ, 2631 /*24*/ MatZeroRows_MPIAIJ, 2632 0, 2633 #ifdef PETSC_HAVE_PBGL 2634 0, 2635 #else 2636 0, 2637 #endif 2638 0, 2639 0, 2640 /*29*/ MatSetUpPreallocation_MPIAIJ, 2641 #ifdef PETSC_HAVE_PBGL 2642 0, 2643 #else 2644 0, 2645 #endif 2646 0, 2647 0, 2648 0, 2649 /*34*/ MatDuplicate_MPIAIJ, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*39*/ MatAXPY_MPIAIJ, 2655 MatGetSubMatrices_MPIAIJ, 2656 MatIncreaseOverlap_MPIAIJ, 2657 MatGetValues_MPIAIJ, 2658 MatCopy_MPIAIJ, 2659 /*44*/ MatGetRowMax_MPIAIJ, 2660 MatScale_MPIAIJ, 2661 0, 2662 0, 2663 0, 2664 /*49*/ MatSetBlockSize_MPIAIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*54*/ MatFDColoringCreate_MPIAIJ, 2670 0, 2671 MatSetUnfactored_MPIAIJ, 2672 MatPermute_MPIAIJ, 2673 0, 2674 /*59*/ MatGetSubMatrix_MPIAIJ, 2675 MatDestroy_MPIAIJ, 2676 MatView_MPIAIJ, 2677 0, 2678 0, 2679 /*64*/ 0, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2685 MatGetRowMinAbs_MPIAIJ, 2686 0, 2687 MatSetColoring_MPIAIJ, 2688 #if defined(PETSC_HAVE_ADIC) 2689 MatSetValuesAdic_MPIAIJ, 2690 #else 2691 0, 2692 #endif 2693 MatSetValuesAdifor_MPIAIJ, 2694 /*75*/ MatFDColoringApply_AIJ, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*80*/ 0, 2700 0, 2701 0, 2702 /*83*/ MatLoad_MPIAIJ, 2703 0, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2709 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2710 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2711 MatPtAP_Basic, 2712 MatPtAPSymbolic_MPIAIJ, 2713 /*94*/ MatPtAPNumeric_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*99*/ 0, 2719 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2720 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2721 MatConjugate_MPIAIJ, 2722 0, 2723 /*104*/MatSetValuesRow_MPIAIJ, 2724 MatRealPart_MPIAIJ, 2725 MatImaginaryPart_MPIAIJ, 2726 0, 2727 0, 2728 /*109*/0, 2729 MatGetRedundantMatrix_MPIAIJ, 2730 MatGetRowMin_MPIAIJ, 2731 0, 2732 0, 2733 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2734 0, 2735 0, 2736 0, 2737 0, 2738 /*119*/0, 2739 0, 2740 0, 2741 0, 2742 MatGetMultiProcBlock_MPIAIJ 2743 }; 2744 2745 /* ----------------------------------------------------------------------------------------*/ 2746 2747 EXTERN_C_BEGIN 2748 #undef __FUNCT__ 2749 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2750 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2751 { 2752 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2757 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2758 PetscFunctionReturn(0); 2759 } 2760 EXTERN_C_END 2761 2762 EXTERN_C_BEGIN 2763 #undef __FUNCT__ 2764 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2765 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2766 { 2767 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2768 PetscErrorCode ierr; 2769 2770 PetscFunctionBegin; 2771 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2772 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2773 PetscFunctionReturn(0); 2774 } 2775 EXTERN_C_END 2776 2777 EXTERN_C_BEGIN 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2780 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2781 { 2782 Mat_MPIAIJ *b; 2783 PetscErrorCode ierr; 2784 PetscInt i; 2785 2786 PetscFunctionBegin; 2787 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2788 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2789 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2790 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2791 2792 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2793 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2794 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2795 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2796 if (d_nnz) { 2797 for (i=0; i<B->rmap->n; i++) { 2798 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2799 } 2800 } 2801 if (o_nnz) { 2802 for (i=0; i<B->rmap->n; i++) { 2803 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2804 } 2805 } 2806 b = (Mat_MPIAIJ*)B->data; 2807 2808 if (!B->preallocated) { 2809 /* Explicitly create 2 MATSEQAIJ matrices. */ 2810 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2811 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2812 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2813 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2814 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2815 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2816 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2817 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2818 } 2819 2820 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2821 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2822 B->preallocated = PETSC_TRUE; 2823 PetscFunctionReturn(0); 2824 } 2825 EXTERN_C_END 2826 2827 #undef __FUNCT__ 2828 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2830 { 2831 Mat mat; 2832 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2833 PetscErrorCode ierr; 2834 2835 PetscFunctionBegin; 2836 *newmat = 0; 2837 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2838 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2839 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2840 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2841 a = (Mat_MPIAIJ*)mat->data; 2842 2843 mat->factortype = matin->factortype; 2844 mat->rmap->bs = matin->rmap->bs; 2845 mat->assembled = PETSC_TRUE; 2846 mat->insertmode = NOT_SET_VALUES; 2847 mat->preallocated = PETSC_TRUE; 2848 2849 a->size = oldmat->size; 2850 a->rank = oldmat->rank; 2851 a->donotstash = oldmat->donotstash; 2852 a->roworiented = oldmat->roworiented; 2853 a->rowindices = 0; 2854 a->rowvalues = 0; 2855 a->getrowactive = PETSC_FALSE; 2856 2857 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2858 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2859 2860 if (oldmat->colmap) { 2861 #if defined (PETSC_USE_CTABLE) 2862 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2863 #else 2864 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2865 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2866 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2867 #endif 2868 } else a->colmap = 0; 2869 if (oldmat->garray) { 2870 PetscInt len; 2871 len = oldmat->B->cmap->n; 2872 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2873 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2874 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2875 } else a->garray = 0; 2876 2877 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2878 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2879 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2880 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2881 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2883 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2884 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2885 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2886 *newmat = mat; 2887 PetscFunctionReturn(0); 2888 } 2889 2890 /* 2891 Allows sending/receiving larger messages then 2 gigabytes in a single call 2892 */ 2893 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2894 { 2895 int ierr; 2896 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2897 PetscInt i,numchunks; 2898 PetscMPIInt icnt; 2899 2900 numchunks = cnt/CHUNKSIZE + 1; 2901 for (i=0; i<numchunks; i++) { 2902 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2903 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2904 if (type == MPIU_INT) { 2905 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2906 } else if (type == MPIU_SCALAR) { 2907 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2908 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2909 } 2910 return 0; 2911 } 2912 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2913 { 2914 int ierr; 2915 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2916 MPI_Status status; 2917 PetscInt i,numchunks; 2918 PetscMPIInt icnt; 2919 2920 numchunks = cnt/CHUNKSIZE + 1; 2921 for (i=0; i<numchunks; i++) { 2922 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2923 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2924 if (type == MPIU_INT) { 2925 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2926 } else if (type == MPIU_SCALAR) { 2927 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2928 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2929 } 2930 return 0; 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatLoad_MPIAIJ" 2935 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2936 { 2937 PetscScalar *vals,*svals; 2938 MPI_Comm comm = ((PetscObject)viewer)->comm; 2939 PetscErrorCode ierr; 2940 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2941 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2942 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2943 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2944 PetscInt cend,cstart,n,*rowners,sizesset=1; 2945 int fd; 2946 2947 PetscFunctionBegin; 2948 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2949 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2950 if (!rank) { 2951 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2952 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2953 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2954 } 2955 2956 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2957 2958 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2959 M = header[1]; N = header[2]; 2960 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2961 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2962 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2963 2964 /* If global sizes are set, check if they are consistent with that given in the file */ 2965 if (sizesset) { 2966 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2967 } 2968 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2969 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2970 2971 /* determine ownership of all rows */ 2972 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2973 else m = newMat->rmap->n; /* Set by user */ 2974 2975 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2976 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2977 2978 /* First process needs enough room for process with most rows */ 2979 if (!rank) { 2980 mmax = rowners[1]; 2981 for (i=2; i<size; i++) { 2982 mmax = PetscMax(mmax,rowners[i]); 2983 } 2984 } else mmax = m; 2985 2986 rowners[0] = 0; 2987 for (i=2; i<=size; i++) { 2988 rowners[i] += rowners[i-1]; 2989 } 2990 rstart = rowners[rank]; 2991 rend = rowners[rank+1]; 2992 2993 /* distribute row lengths to all processors */ 2994 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2995 if (!rank) { 2996 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2997 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2998 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2999 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3000 for (j=0; j<m; j++) { 3001 procsnz[0] += ourlens[j]; 3002 } 3003 for (i=1; i<size; i++) { 3004 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3005 /* calculate the number of nonzeros on each processor */ 3006 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3007 procsnz[i] += rowlengths[j]; 3008 } 3009 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3010 } 3011 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3012 } else { 3013 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3014 } 3015 3016 if (!rank) { 3017 /* determine max buffer needed and allocate it */ 3018 maxnz = 0; 3019 for (i=0; i<size; i++) { 3020 maxnz = PetscMax(maxnz,procsnz[i]); 3021 } 3022 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3023 3024 /* read in my part of the matrix column indices */ 3025 nz = procsnz[0]; 3026 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3027 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3028 3029 /* read in every one elses and ship off */ 3030 for (i=1; i<size; i++) { 3031 nz = procsnz[i]; 3032 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3033 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3034 } 3035 ierr = PetscFree(cols);CHKERRQ(ierr); 3036 } else { 3037 /* determine buffer space needed for message */ 3038 nz = 0; 3039 for (i=0; i<m; i++) { 3040 nz += ourlens[i]; 3041 } 3042 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3043 3044 /* receive message of column indices*/ 3045 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3046 } 3047 3048 /* determine column ownership if matrix is not square */ 3049 if (N != M) { 3050 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3051 else n = newMat->cmap->n; 3052 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3053 cstart = cend - n; 3054 } else { 3055 cstart = rstart; 3056 cend = rend; 3057 n = cend - cstart; 3058 } 3059 3060 /* loop over local rows, determining number of off diagonal entries */ 3061 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3062 jj = 0; 3063 for (i=0; i<m; i++) { 3064 for (j=0; j<ourlens[i]; j++) { 3065 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3066 jj++; 3067 } 3068 } 3069 3070 for (i=0; i<m; i++) { 3071 ourlens[i] -= offlens[i]; 3072 } 3073 if (!sizesset) { 3074 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3075 } 3076 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3077 3078 for (i=0; i<m; i++) { 3079 ourlens[i] += offlens[i]; 3080 } 3081 3082 if (!rank) { 3083 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3084 3085 /* read in my part of the matrix numerical values */ 3086 nz = procsnz[0]; 3087 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3088 3089 /* insert into matrix */ 3090 jj = rstart; 3091 smycols = mycols; 3092 svals = vals; 3093 for (i=0; i<m; i++) { 3094 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3095 smycols += ourlens[i]; 3096 svals += ourlens[i]; 3097 jj++; 3098 } 3099 3100 /* read in other processors and ship out */ 3101 for (i=1; i<size; i++) { 3102 nz = procsnz[i]; 3103 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3104 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3105 } 3106 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3107 } else { 3108 /* receive numeric values */ 3109 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3110 3111 /* receive message of values*/ 3112 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3113 3114 /* insert into matrix */ 3115 jj = rstart; 3116 smycols = mycols; 3117 svals = vals; 3118 for (i=0; i<m; i++) { 3119 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3120 smycols += ourlens[i]; 3121 svals += ourlens[i]; 3122 jj++; 3123 } 3124 } 3125 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3126 ierr = PetscFree(vals);CHKERRQ(ierr); 3127 ierr = PetscFree(mycols);CHKERRQ(ierr); 3128 ierr = PetscFree(rowners);CHKERRQ(ierr); 3129 3130 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3131 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3132 PetscFunctionReturn(0); 3133 } 3134 3135 #undef __FUNCT__ 3136 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3137 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3138 { 3139 PetscErrorCode ierr; 3140 IS iscol_local; 3141 PetscInt csize; 3142 3143 PetscFunctionBegin; 3144 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3145 if (call == MAT_REUSE_MATRIX) { 3146 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3147 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3148 } else { 3149 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3150 } 3151 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3152 if (call == MAT_INITIAL_MATRIX) { 3153 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3154 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3155 } 3156 PetscFunctionReturn(0); 3157 } 3158 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3161 /* 3162 Not great since it makes two copies of the submatrix, first an SeqAIJ 3163 in local and then by concatenating the local matrices the end result. 3164 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3165 3166 Note: This requires a sequential iscol with all indices. 3167 */ 3168 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3169 { 3170 PetscErrorCode ierr; 3171 PetscMPIInt rank,size; 3172 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3173 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3174 Mat *local,M,Mreuse; 3175 MatScalar *vwork,*aa; 3176 MPI_Comm comm = ((PetscObject)mat)->comm; 3177 Mat_SeqAIJ *aij; 3178 3179 3180 PetscFunctionBegin; 3181 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3182 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3183 3184 if (call == MAT_REUSE_MATRIX) { 3185 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3186 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3187 local = &Mreuse; 3188 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3189 } else { 3190 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3191 Mreuse = *local; 3192 ierr = PetscFree(local);CHKERRQ(ierr); 3193 } 3194 3195 /* 3196 m - number of local rows 3197 n - number of columns (same on all processors) 3198 rstart - first row in new global matrix generated 3199 */ 3200 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3201 if (call == MAT_INITIAL_MATRIX) { 3202 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3203 ii = aij->i; 3204 jj = aij->j; 3205 3206 /* 3207 Determine the number of non-zeros in the diagonal and off-diagonal 3208 portions of the matrix in order to do correct preallocation 3209 */ 3210 3211 /* first get start and end of "diagonal" columns */ 3212 if (csize == PETSC_DECIDE) { 3213 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3214 if (mglobal == n) { /* square matrix */ 3215 nlocal = m; 3216 } else { 3217 nlocal = n/size + ((n % size) > rank); 3218 } 3219 } else { 3220 nlocal = csize; 3221 } 3222 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3223 rstart = rend - nlocal; 3224 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3225 3226 /* next, compute all the lengths */ 3227 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3228 olens = dlens + m; 3229 for (i=0; i<m; i++) { 3230 jend = ii[i+1] - ii[i]; 3231 olen = 0; 3232 dlen = 0; 3233 for (j=0; j<jend; j++) { 3234 if (*jj < rstart || *jj >= rend) olen++; 3235 else dlen++; 3236 jj++; 3237 } 3238 olens[i] = olen; 3239 dlens[i] = dlen; 3240 } 3241 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3242 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3243 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3244 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3245 ierr = PetscFree(dlens);CHKERRQ(ierr); 3246 } else { 3247 PetscInt ml,nl; 3248 3249 M = *newmat; 3250 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3251 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3252 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3253 /* 3254 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3255 rather than the slower MatSetValues(). 3256 */ 3257 M->was_assembled = PETSC_TRUE; 3258 M->assembled = PETSC_FALSE; 3259 } 3260 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3261 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3262 ii = aij->i; 3263 jj = aij->j; 3264 aa = aij->a; 3265 for (i=0; i<m; i++) { 3266 row = rstart + i; 3267 nz = ii[i+1] - ii[i]; 3268 cwork = jj; jj += nz; 3269 vwork = aa; aa += nz; 3270 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3271 } 3272 3273 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3274 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3275 *newmat = M; 3276 3277 /* save submatrix used in processor for next request */ 3278 if (call == MAT_INITIAL_MATRIX) { 3279 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3280 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3281 } 3282 3283 PetscFunctionReturn(0); 3284 } 3285 3286 EXTERN_C_BEGIN 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3289 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3290 { 3291 PetscInt m,cstart, cend,j,nnz,i,d; 3292 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3293 const PetscInt *JJ; 3294 PetscScalar *values; 3295 PetscErrorCode ierr; 3296 3297 PetscFunctionBegin; 3298 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3299 3300 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3301 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3302 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3303 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3304 m = B->rmap->n; 3305 cstart = B->cmap->rstart; 3306 cend = B->cmap->rend; 3307 rstart = B->rmap->rstart; 3308 3309 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3310 3311 #if defined(PETSC_USE_DEBUGGING) 3312 for (i=0; i<m; i++) { 3313 nnz = Ii[i+1]- Ii[i]; 3314 JJ = J + Ii[i]; 3315 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3316 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3317 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3318 } 3319 #endif 3320 3321 for (i=0; i<m; i++) { 3322 nnz = Ii[i+1]- Ii[i]; 3323 JJ = J + Ii[i]; 3324 nnz_max = PetscMax(nnz_max,nnz); 3325 d = 0; 3326 for (j=0; j<nnz; j++) { 3327 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3328 } 3329 d_nnz[i] = d; 3330 o_nnz[i] = nnz - d; 3331 } 3332 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3333 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3334 3335 if (v) values = (PetscScalar*)v; 3336 else { 3337 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3338 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3339 } 3340 3341 for (i=0; i<m; i++) { 3342 ii = i + rstart; 3343 nnz = Ii[i+1]- Ii[i]; 3344 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3345 } 3346 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3347 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3348 3349 if (!v) { 3350 ierr = PetscFree(values);CHKERRQ(ierr); 3351 } 3352 PetscFunctionReturn(0); 3353 } 3354 EXTERN_C_END 3355 3356 #undef __FUNCT__ 3357 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3358 /*@ 3359 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3360 (the default parallel PETSc format). 3361 3362 Collective on MPI_Comm 3363 3364 Input Parameters: 3365 + B - the matrix 3366 . i - the indices into j for the start of each local row (starts with zero) 3367 . j - the column indices for each local row (starts with zero) 3368 - v - optional values in the matrix 3369 3370 Level: developer 3371 3372 Notes: 3373 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3374 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3375 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3376 3377 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3378 3379 The format which is used for the sparse matrix input, is equivalent to a 3380 row-major ordering.. i.e for the following matrix, the input data expected is 3381 as shown: 3382 3383 1 0 0 3384 2 0 3 P0 3385 ------- 3386 4 5 6 P1 3387 3388 Process0 [P0]: rows_owned=[0,1] 3389 i = {0,1,3} [size = nrow+1 = 2+1] 3390 j = {0,0,2} [size = nz = 6] 3391 v = {1,2,3} [size = nz = 6] 3392 3393 Process1 [P1]: rows_owned=[2] 3394 i = {0,3} [size = nrow+1 = 1+1] 3395 j = {0,1,2} [size = nz = 6] 3396 v = {4,5,6} [size = nz = 6] 3397 3398 .keywords: matrix, aij, compressed row, sparse, parallel 3399 3400 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3401 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3402 @*/ 3403 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3404 { 3405 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3406 3407 PetscFunctionBegin; 3408 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3409 if (f) { 3410 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3411 } 3412 PetscFunctionReturn(0); 3413 } 3414 3415 #undef __FUNCT__ 3416 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3417 /*@C 3418 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3419 (the default parallel PETSc format). For good matrix assembly performance 3420 the user should preallocate the matrix storage by setting the parameters 3421 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3422 performance can be increased by more than a factor of 50. 3423 3424 Collective on MPI_Comm 3425 3426 Input Parameters: 3427 + A - the matrix 3428 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3429 (same value is used for all local rows) 3430 . d_nnz - array containing the number of nonzeros in the various rows of the 3431 DIAGONAL portion of the local submatrix (possibly different for each row) 3432 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3433 The size of this array is equal to the number of local rows, i.e 'm'. 3434 You must leave room for the diagonal entry even if it is zero. 3435 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3436 submatrix (same value is used for all local rows). 3437 - o_nnz - array containing the number of nonzeros in the various rows of the 3438 OFF-DIAGONAL portion of the local submatrix (possibly different for 3439 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3440 structure. The size of this array is equal to the number 3441 of local rows, i.e 'm'. 3442 3443 If the *_nnz parameter is given then the *_nz parameter is ignored 3444 3445 The AIJ format (also called the Yale sparse matrix format or 3446 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3447 storage. The stored row and column indices begin with zero. 3448 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3449 3450 The parallel matrix is partitioned such that the first m0 rows belong to 3451 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3452 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3453 3454 The DIAGONAL portion of the local submatrix of a processor can be defined 3455 as the submatrix which is obtained by extraction the part corresponding to 3456 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3457 first row that belongs to the processor, r2 is the last row belonging to 3458 the this processor, and c1-c2 is range of indices of the local part of a 3459 vector suitable for applying the matrix to. This is an mxn matrix. In the 3460 common case of a square matrix, the row and column ranges are the same and 3461 the DIAGONAL part is also square. The remaining portion of the local 3462 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3463 3464 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3465 3466 You can call MatGetInfo() to get information on how effective the preallocation was; 3467 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3468 You can also run with the option -info and look for messages with the string 3469 malloc in them to see if additional memory allocation was needed. 3470 3471 Example usage: 3472 3473 Consider the following 8x8 matrix with 34 non-zero values, that is 3474 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3475 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3476 as follows: 3477 3478 .vb 3479 1 2 0 | 0 3 0 | 0 4 3480 Proc0 0 5 6 | 7 0 0 | 8 0 3481 9 0 10 | 11 0 0 | 12 0 3482 ------------------------------------- 3483 13 0 14 | 15 16 17 | 0 0 3484 Proc1 0 18 0 | 19 20 21 | 0 0 3485 0 0 0 | 22 23 0 | 24 0 3486 ------------------------------------- 3487 Proc2 25 26 27 | 0 0 28 | 29 0 3488 30 0 0 | 31 32 33 | 0 34 3489 .ve 3490 3491 This can be represented as a collection of submatrices as: 3492 3493 .vb 3494 A B C 3495 D E F 3496 G H I 3497 .ve 3498 3499 Where the submatrices A,B,C are owned by proc0, D,E,F are 3500 owned by proc1, G,H,I are owned by proc2. 3501 3502 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3503 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3504 The 'M','N' parameters are 8,8, and have the same values on all procs. 3505 3506 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3507 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3508 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3509 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3510 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3511 matrix, ans [DF] as another SeqAIJ matrix. 3512 3513 When d_nz, o_nz parameters are specified, d_nz storage elements are 3514 allocated for every row of the local diagonal submatrix, and o_nz 3515 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3516 One way to choose d_nz and o_nz is to use the max nonzerors per local 3517 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3518 In this case, the values of d_nz,o_nz are: 3519 .vb 3520 proc0 : dnz = 2, o_nz = 2 3521 proc1 : dnz = 3, o_nz = 2 3522 proc2 : dnz = 1, o_nz = 4 3523 .ve 3524 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3525 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3526 for proc3. i.e we are using 12+15+10=37 storage locations to store 3527 34 values. 3528 3529 When d_nnz, o_nnz parameters are specified, the storage is specified 3530 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3531 In the above case the values for d_nnz,o_nnz are: 3532 .vb 3533 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3534 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3535 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3536 .ve 3537 Here the space allocated is sum of all the above values i.e 34, and 3538 hence pre-allocation is perfect. 3539 3540 Level: intermediate 3541 3542 .keywords: matrix, aij, compressed row, sparse, parallel 3543 3544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3545 MPIAIJ, MatGetInfo() 3546 @*/ 3547 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3548 { 3549 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3550 3551 PetscFunctionBegin; 3552 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3553 if (f) { 3554 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3555 } 3556 PetscFunctionReturn(0); 3557 } 3558 3559 #undef __FUNCT__ 3560 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3561 /*@ 3562 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3563 CSR format the local rows. 3564 3565 Collective on MPI_Comm 3566 3567 Input Parameters: 3568 + comm - MPI communicator 3569 . m - number of local rows (Cannot be PETSC_DECIDE) 3570 . n - This value should be the same as the local size used in creating the 3571 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3572 calculated if N is given) For square matrices n is almost always m. 3573 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3574 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3575 . i - row indices 3576 . j - column indices 3577 - a - matrix values 3578 3579 Output Parameter: 3580 . mat - the matrix 3581 3582 Level: intermediate 3583 3584 Notes: 3585 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3586 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3587 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3588 3589 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3590 3591 The format which is used for the sparse matrix input, is equivalent to a 3592 row-major ordering.. i.e for the following matrix, the input data expected is 3593 as shown: 3594 3595 1 0 0 3596 2 0 3 P0 3597 ------- 3598 4 5 6 P1 3599 3600 Process0 [P0]: rows_owned=[0,1] 3601 i = {0,1,3} [size = nrow+1 = 2+1] 3602 j = {0,0,2} [size = nz = 6] 3603 v = {1,2,3} [size = nz = 6] 3604 3605 Process1 [P1]: rows_owned=[2] 3606 i = {0,3} [size = nrow+1 = 1+1] 3607 j = {0,1,2} [size = nz = 6] 3608 v = {4,5,6} [size = nz = 6] 3609 3610 .keywords: matrix, aij, compressed row, sparse, parallel 3611 3612 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3613 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3614 @*/ 3615 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3616 { 3617 PetscErrorCode ierr; 3618 3619 PetscFunctionBegin; 3620 if (i[0]) { 3621 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3622 } 3623 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3624 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3625 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3626 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3627 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3628 PetscFunctionReturn(0); 3629 } 3630 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "MatCreateMPIAIJ" 3633 /*@C 3634 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3635 (the default parallel PETSc format). For good matrix assembly performance 3636 the user should preallocate the matrix storage by setting the parameters 3637 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3638 performance can be increased by more than a factor of 50. 3639 3640 Collective on MPI_Comm 3641 3642 Input Parameters: 3643 + comm - MPI communicator 3644 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3645 This value should be the same as the local size used in creating the 3646 y vector for the matrix-vector product y = Ax. 3647 . n - This value should be the same as the local size used in creating the 3648 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3649 calculated if N is given) For square matrices n is almost always m. 3650 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3651 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3652 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3653 (same value is used for all local rows) 3654 . d_nnz - array containing the number of nonzeros in the various rows of the 3655 DIAGONAL portion of the local submatrix (possibly different for each row) 3656 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3657 The size of this array is equal to the number of local rows, i.e 'm'. 3658 You must leave room for the diagonal entry even if it is zero. 3659 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3660 submatrix (same value is used for all local rows). 3661 - o_nnz - array containing the number of nonzeros in the various rows of the 3662 OFF-DIAGONAL portion of the local submatrix (possibly different for 3663 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3664 structure. The size of this array is equal to the number 3665 of local rows, i.e 'm'. 3666 3667 Output Parameter: 3668 . A - the matrix 3669 3670 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3671 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3672 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3673 3674 Notes: 3675 If the *_nnz parameter is given then the *_nz parameter is ignored 3676 3677 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3678 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3679 storage requirements for this matrix. 3680 3681 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3682 processor than it must be used on all processors that share the object for 3683 that argument. 3684 3685 The user MUST specify either the local or global matrix dimensions 3686 (possibly both). 3687 3688 The parallel matrix is partitioned across processors such that the 3689 first m0 rows belong to process 0, the next m1 rows belong to 3690 process 1, the next m2 rows belong to process 2 etc.. where 3691 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3692 values corresponding to [m x N] submatrix. 3693 3694 The columns are logically partitioned with the n0 columns belonging 3695 to 0th partition, the next n1 columns belonging to the next 3696 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3697 3698 The DIAGONAL portion of the local submatrix on any given processor 3699 is the submatrix corresponding to the rows and columns m,n 3700 corresponding to the given processor. i.e diagonal matrix on 3701 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3702 etc. The remaining portion of the local submatrix [m x (N-n)] 3703 constitute the OFF-DIAGONAL portion. The example below better 3704 illustrates this concept. 3705 3706 For a square global matrix we define each processor's diagonal portion 3707 to be its local rows and the corresponding columns (a square submatrix); 3708 each processor's off-diagonal portion encompasses the remainder of the 3709 local matrix (a rectangular submatrix). 3710 3711 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3712 3713 When calling this routine with a single process communicator, a matrix of 3714 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3715 type of communicator, use the construction mechanism: 3716 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3717 3718 By default, this format uses inodes (identical nodes) when possible. 3719 We search for consecutive rows with the same nonzero structure, thereby 3720 reusing matrix information to achieve increased efficiency. 3721 3722 Options Database Keys: 3723 + -mat_no_inode - Do not use inodes 3724 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3725 - -mat_aij_oneindex - Internally use indexing starting at 1 3726 rather than 0. Note that when calling MatSetValues(), 3727 the user still MUST index entries starting at 0! 3728 3729 3730 Example usage: 3731 3732 Consider the following 8x8 matrix with 34 non-zero values, that is 3733 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3734 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3735 as follows: 3736 3737 .vb 3738 1 2 0 | 0 3 0 | 0 4 3739 Proc0 0 5 6 | 7 0 0 | 8 0 3740 9 0 10 | 11 0 0 | 12 0 3741 ------------------------------------- 3742 13 0 14 | 15 16 17 | 0 0 3743 Proc1 0 18 0 | 19 20 21 | 0 0 3744 0 0 0 | 22 23 0 | 24 0 3745 ------------------------------------- 3746 Proc2 25 26 27 | 0 0 28 | 29 0 3747 30 0 0 | 31 32 33 | 0 34 3748 .ve 3749 3750 This can be represented as a collection of submatrices as: 3751 3752 .vb 3753 A B C 3754 D E F 3755 G H I 3756 .ve 3757 3758 Where the submatrices A,B,C are owned by proc0, D,E,F are 3759 owned by proc1, G,H,I are owned by proc2. 3760 3761 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3762 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3763 The 'M','N' parameters are 8,8, and have the same values on all procs. 3764 3765 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3766 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3767 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3768 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3769 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3770 matrix, ans [DF] as another SeqAIJ matrix. 3771 3772 When d_nz, o_nz parameters are specified, d_nz storage elements are 3773 allocated for every row of the local diagonal submatrix, and o_nz 3774 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3775 One way to choose d_nz and o_nz is to use the max nonzerors per local 3776 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3777 In this case, the values of d_nz,o_nz are: 3778 .vb 3779 proc0 : dnz = 2, o_nz = 2 3780 proc1 : dnz = 3, o_nz = 2 3781 proc2 : dnz = 1, o_nz = 4 3782 .ve 3783 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3784 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3785 for proc3. i.e we are using 12+15+10=37 storage locations to store 3786 34 values. 3787 3788 When d_nnz, o_nnz parameters are specified, the storage is specified 3789 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3790 In the above case the values for d_nnz,o_nnz are: 3791 .vb 3792 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3793 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3794 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3795 .ve 3796 Here the space allocated is sum of all the above values i.e 34, and 3797 hence pre-allocation is perfect. 3798 3799 Level: intermediate 3800 3801 .keywords: matrix, aij, compressed row, sparse, parallel 3802 3803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3804 MPIAIJ, MatCreateMPIAIJWithArrays() 3805 @*/ 3806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3807 { 3808 PetscErrorCode ierr; 3809 PetscMPIInt size; 3810 3811 PetscFunctionBegin; 3812 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3813 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3814 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3815 if (size > 1) { 3816 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3817 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3818 } else { 3819 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3820 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3821 } 3822 PetscFunctionReturn(0); 3823 } 3824 3825 #undef __FUNCT__ 3826 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3827 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3828 { 3829 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3830 3831 PetscFunctionBegin; 3832 *Ad = a->A; 3833 *Ao = a->B; 3834 *colmap = a->garray; 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3840 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3841 { 3842 PetscErrorCode ierr; 3843 PetscInt i; 3844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3845 3846 PetscFunctionBegin; 3847 if (coloring->ctype == IS_COLORING_GLOBAL) { 3848 ISColoringValue *allcolors,*colors; 3849 ISColoring ocoloring; 3850 3851 /* set coloring for diagonal portion */ 3852 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3853 3854 /* set coloring for off-diagonal portion */ 3855 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3856 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3857 for (i=0; i<a->B->cmap->n; i++) { 3858 colors[i] = allcolors[a->garray[i]]; 3859 } 3860 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3861 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3862 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3863 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3864 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3865 ISColoringValue *colors; 3866 PetscInt *larray; 3867 ISColoring ocoloring; 3868 3869 /* set coloring for diagonal portion */ 3870 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3871 for (i=0; i<a->A->cmap->n; i++) { 3872 larray[i] = i + A->cmap->rstart; 3873 } 3874 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3875 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3876 for (i=0; i<a->A->cmap->n; i++) { 3877 colors[i] = coloring->colors[larray[i]]; 3878 } 3879 ierr = PetscFree(larray);CHKERRQ(ierr); 3880 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3881 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3882 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3883 3884 /* set coloring for off-diagonal portion */ 3885 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3886 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3887 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3888 for (i=0; i<a->B->cmap->n; i++) { 3889 colors[i] = coloring->colors[larray[i]]; 3890 } 3891 ierr = PetscFree(larray);CHKERRQ(ierr); 3892 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3893 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3894 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3895 } else { 3896 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3897 } 3898 3899 PetscFunctionReturn(0); 3900 } 3901 3902 #if defined(PETSC_HAVE_ADIC) 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3905 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3906 { 3907 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3908 PetscErrorCode ierr; 3909 3910 PetscFunctionBegin; 3911 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3912 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3913 PetscFunctionReturn(0); 3914 } 3915 #endif 3916 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3919 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3920 { 3921 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3922 PetscErrorCode ierr; 3923 3924 PetscFunctionBegin; 3925 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3926 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3927 PetscFunctionReturn(0); 3928 } 3929 3930 #undef __FUNCT__ 3931 #define __FUNCT__ "MatMerge" 3932 /*@ 3933 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3934 matrices from each processor 3935 3936 Collective on MPI_Comm 3937 3938 Input Parameters: 3939 + comm - the communicators the parallel matrix will live on 3940 . inmat - the input sequential matrices 3941 . n - number of local columns (or PETSC_DECIDE) 3942 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3943 3944 Output Parameter: 3945 . outmat - the parallel matrix generated 3946 3947 Level: advanced 3948 3949 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3950 3951 @*/ 3952 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3953 { 3954 PetscErrorCode ierr; 3955 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3956 PetscInt *indx; 3957 PetscScalar *values; 3958 3959 PetscFunctionBegin; 3960 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3961 if (scall == MAT_INITIAL_MATRIX){ 3962 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3963 if (n == PETSC_DECIDE){ 3964 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3965 } 3966 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3967 rstart -= m; 3968 3969 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3970 for (i=0;i<m;i++) { 3971 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3972 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3973 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3974 } 3975 /* This routine will ONLY return MPIAIJ type matrix */ 3976 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3977 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3978 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3979 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3980 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3981 3982 } else if (scall == MAT_REUSE_MATRIX){ 3983 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3984 } else { 3985 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3986 } 3987 3988 for (i=0;i<m;i++) { 3989 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3990 Ii = i + rstart; 3991 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3992 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3993 } 3994 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3995 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3996 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3997 3998 PetscFunctionReturn(0); 3999 } 4000 4001 #undef __FUNCT__ 4002 #define __FUNCT__ "MatFileSplit" 4003 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4004 { 4005 PetscErrorCode ierr; 4006 PetscMPIInt rank; 4007 PetscInt m,N,i,rstart,nnz; 4008 size_t len; 4009 const PetscInt *indx; 4010 PetscViewer out; 4011 char *name; 4012 Mat B; 4013 const PetscScalar *values; 4014 4015 PetscFunctionBegin; 4016 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4017 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4018 /* Should this be the type of the diagonal block of A? */ 4019 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4020 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4021 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4022 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4023 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4024 for (i=0;i<m;i++) { 4025 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4026 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4027 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4028 } 4029 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4030 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4031 4032 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4033 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4034 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4035 sprintf(name,"%s.%d",outfile,rank); 4036 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4037 ierr = PetscFree(name); 4038 ierr = MatView(B,out);CHKERRQ(ierr); 4039 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4040 ierr = MatDestroy(B);CHKERRQ(ierr); 4041 PetscFunctionReturn(0); 4042 } 4043 4044 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4045 #undef __FUNCT__ 4046 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4047 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4048 { 4049 PetscErrorCode ierr; 4050 Mat_Merge_SeqsToMPI *merge; 4051 PetscContainer container; 4052 4053 PetscFunctionBegin; 4054 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4055 if (container) { 4056 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4057 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4058 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4059 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4060 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4061 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4062 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4063 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4064 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4065 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4066 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4067 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4068 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4069 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4070 4071 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4072 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4073 } 4074 ierr = PetscFree(merge);CHKERRQ(ierr); 4075 4076 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079 4080 #include "../src/mat/utils/freespace.h" 4081 #include "petscbt.h" 4082 4083 #undef __FUNCT__ 4084 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4085 /*@C 4086 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4087 matrices from each processor 4088 4089 Collective on MPI_Comm 4090 4091 Input Parameters: 4092 + comm - the communicators the parallel matrix will live on 4093 . seqmat - the input sequential matrices 4094 . m - number of local rows (or PETSC_DECIDE) 4095 . n - number of local columns (or PETSC_DECIDE) 4096 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4097 4098 Output Parameter: 4099 . mpimat - the parallel matrix generated 4100 4101 Level: advanced 4102 4103 Notes: 4104 The dimensions of the sequential matrix in each processor MUST be the same. 4105 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4106 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4107 @*/ 4108 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4109 { 4110 PetscErrorCode ierr; 4111 MPI_Comm comm=((PetscObject)mpimat)->comm; 4112 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4113 PetscMPIInt size,rank,taga,*len_s; 4114 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4115 PetscInt proc,m; 4116 PetscInt **buf_ri,**buf_rj; 4117 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4118 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4119 MPI_Request *s_waits,*r_waits; 4120 MPI_Status *status; 4121 MatScalar *aa=a->a; 4122 MatScalar **abuf_r,*ba_i; 4123 Mat_Merge_SeqsToMPI *merge; 4124 PetscContainer container; 4125 4126 PetscFunctionBegin; 4127 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4128 4129 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4130 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4131 4132 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4133 if (container) { 4134 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4135 } 4136 bi = merge->bi; 4137 bj = merge->bj; 4138 buf_ri = merge->buf_ri; 4139 buf_rj = merge->buf_rj; 4140 4141 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4142 owners = merge->rowmap->range; 4143 len_s = merge->len_s; 4144 4145 /* send and recv matrix values */ 4146 /*-----------------------------*/ 4147 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4148 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4149 4150 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4151 for (proc=0,k=0; proc<size; proc++){ 4152 if (!len_s[proc]) continue; 4153 i = owners[proc]; 4154 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4155 k++; 4156 } 4157 4158 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4159 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4160 ierr = PetscFree(status);CHKERRQ(ierr); 4161 4162 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4163 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4164 4165 /* insert mat values of mpimat */ 4166 /*----------------------------*/ 4167 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4168 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4169 4170 for (k=0; k<merge->nrecv; k++){ 4171 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4172 nrows = *(buf_ri_k[k]); 4173 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4174 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4175 } 4176 4177 /* set values of ba */ 4178 m = merge->rowmap->n; 4179 for (i=0; i<m; i++) { 4180 arow = owners[rank] + i; 4181 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4182 bnzi = bi[i+1] - bi[i]; 4183 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4184 4185 /* add local non-zero vals of this proc's seqmat into ba */ 4186 anzi = ai[arow+1] - ai[arow]; 4187 aj = a->j + ai[arow]; 4188 aa = a->a + ai[arow]; 4189 nextaj = 0; 4190 for (j=0; nextaj<anzi; j++){ 4191 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4192 ba_i[j] += aa[nextaj++]; 4193 } 4194 } 4195 4196 /* add received vals into ba */ 4197 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4198 /* i-th row */ 4199 if (i == *nextrow[k]) { 4200 anzi = *(nextai[k]+1) - *nextai[k]; 4201 aj = buf_rj[k] + *(nextai[k]); 4202 aa = abuf_r[k] + *(nextai[k]); 4203 nextaj = 0; 4204 for (j=0; nextaj<anzi; j++){ 4205 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4206 ba_i[j] += aa[nextaj++]; 4207 } 4208 } 4209 nextrow[k]++; nextai[k]++; 4210 } 4211 } 4212 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4213 } 4214 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4215 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4216 4217 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4218 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4219 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4220 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4221 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4222 PetscFunctionReturn(0); 4223 } 4224 4225 #undef __FUNCT__ 4226 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4227 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4228 { 4229 PetscErrorCode ierr; 4230 Mat B_mpi; 4231 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4232 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4233 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4234 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4235 PetscInt len,proc,*dnz,*onz; 4236 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4237 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4238 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4239 MPI_Status *status; 4240 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4241 PetscBT lnkbt; 4242 Mat_Merge_SeqsToMPI *merge; 4243 PetscContainer container; 4244 4245 PetscFunctionBegin; 4246 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4247 4248 /* make sure it is a PETSc comm */ 4249 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4251 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4252 4253 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4254 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4255 4256 /* determine row ownership */ 4257 /*---------------------------------------------------------*/ 4258 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4259 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4260 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4261 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4262 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4263 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4264 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4265 4266 m = merge->rowmap->n; 4267 M = merge->rowmap->N; 4268 owners = merge->rowmap->range; 4269 4270 /* determine the number of messages to send, their lengths */ 4271 /*---------------------------------------------------------*/ 4272 len_s = merge->len_s; 4273 4274 len = 0; /* length of buf_si[] */ 4275 merge->nsend = 0; 4276 for (proc=0; proc<size; proc++){ 4277 len_si[proc] = 0; 4278 if (proc == rank){ 4279 len_s[proc] = 0; 4280 } else { 4281 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4282 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4283 } 4284 if (len_s[proc]) { 4285 merge->nsend++; 4286 nrows = 0; 4287 for (i=owners[proc]; i<owners[proc+1]; i++){ 4288 if (ai[i+1] > ai[i]) nrows++; 4289 } 4290 len_si[proc] = 2*(nrows+1); 4291 len += len_si[proc]; 4292 } 4293 } 4294 4295 /* determine the number and length of messages to receive for ij-structure */ 4296 /*-------------------------------------------------------------------------*/ 4297 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4298 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4299 4300 /* post the Irecv of j-structure */ 4301 /*-------------------------------*/ 4302 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4303 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4304 4305 /* post the Isend of j-structure */ 4306 /*--------------------------------*/ 4307 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4308 4309 for (proc=0, k=0; proc<size; proc++){ 4310 if (!len_s[proc]) continue; 4311 i = owners[proc]; 4312 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4313 k++; 4314 } 4315 4316 /* receives and sends of j-structure are complete */ 4317 /*------------------------------------------------*/ 4318 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4319 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4320 4321 /* send and recv i-structure */ 4322 /*---------------------------*/ 4323 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4324 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4325 4326 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4327 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4328 for (proc=0,k=0; proc<size; proc++){ 4329 if (!len_s[proc]) continue; 4330 /* form outgoing message for i-structure: 4331 buf_si[0]: nrows to be sent 4332 [1:nrows]: row index (global) 4333 [nrows+1:2*nrows+1]: i-structure index 4334 */ 4335 /*-------------------------------------------*/ 4336 nrows = len_si[proc]/2 - 1; 4337 buf_si_i = buf_si + nrows+1; 4338 buf_si[0] = nrows; 4339 buf_si_i[0] = 0; 4340 nrows = 0; 4341 for (i=owners[proc]; i<owners[proc+1]; i++){ 4342 anzi = ai[i+1] - ai[i]; 4343 if (anzi) { 4344 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4345 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4346 nrows++; 4347 } 4348 } 4349 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4350 k++; 4351 buf_si += len_si[proc]; 4352 } 4353 4354 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4355 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4356 4357 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4358 for (i=0; i<merge->nrecv; i++){ 4359 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4360 } 4361 4362 ierr = PetscFree(len_si);CHKERRQ(ierr); 4363 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4364 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4365 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4366 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4367 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4368 ierr = PetscFree(status);CHKERRQ(ierr); 4369 4370 /* compute a local seq matrix in each processor */ 4371 /*----------------------------------------------*/ 4372 /* allocate bi array and free space for accumulating nonzero column info */ 4373 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4374 bi[0] = 0; 4375 4376 /* create and initialize a linked list */ 4377 nlnk = N+1; 4378 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4379 4380 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4381 len = 0; 4382 len = ai[owners[rank+1]] - ai[owners[rank]]; 4383 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4384 current_space = free_space; 4385 4386 /* determine symbolic info for each local row */ 4387 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4388 4389 for (k=0; k<merge->nrecv; k++){ 4390 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4391 nrows = *buf_ri_k[k]; 4392 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4393 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4394 } 4395 4396 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4397 len = 0; 4398 for (i=0;i<m;i++) { 4399 bnzi = 0; 4400 /* add local non-zero cols of this proc's seqmat into lnk */ 4401 arow = owners[rank] + i; 4402 anzi = ai[arow+1] - ai[arow]; 4403 aj = a->j + ai[arow]; 4404 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4405 bnzi += nlnk; 4406 /* add received col data into lnk */ 4407 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4408 if (i == *nextrow[k]) { /* i-th row */ 4409 anzi = *(nextai[k]+1) - *nextai[k]; 4410 aj = buf_rj[k] + *nextai[k]; 4411 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4412 bnzi += nlnk; 4413 nextrow[k]++; nextai[k]++; 4414 } 4415 } 4416 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4417 4418 /* if free space is not available, make more free space */ 4419 if (current_space->local_remaining<bnzi) { 4420 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4421 nspacedouble++; 4422 } 4423 /* copy data into free space, then initialize lnk */ 4424 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4425 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4426 4427 current_space->array += bnzi; 4428 current_space->local_used += bnzi; 4429 current_space->local_remaining -= bnzi; 4430 4431 bi[i+1] = bi[i] + bnzi; 4432 } 4433 4434 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4435 4436 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4437 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4438 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4439 4440 /* create symbolic parallel matrix B_mpi */ 4441 /*---------------------------------------*/ 4442 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4443 if (n==PETSC_DECIDE) { 4444 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4445 } else { 4446 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4447 } 4448 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4449 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4450 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4451 4452 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4453 B_mpi->assembled = PETSC_FALSE; 4454 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4455 merge->bi = bi; 4456 merge->bj = bj; 4457 merge->buf_ri = buf_ri; 4458 merge->buf_rj = buf_rj; 4459 merge->coi = PETSC_NULL; 4460 merge->coj = PETSC_NULL; 4461 merge->owners_co = PETSC_NULL; 4462 4463 /* attach the supporting struct to B_mpi for reuse */ 4464 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4465 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4466 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4467 *mpimat = B_mpi; 4468 4469 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4470 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4471 PetscFunctionReturn(0); 4472 } 4473 4474 #undef __FUNCT__ 4475 #define __FUNCT__ "MatMerge_SeqsToMPI" 4476 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4477 { 4478 PetscErrorCode ierr; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4482 if (scall == MAT_INITIAL_MATRIX){ 4483 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4484 } 4485 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4486 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4487 PetscFunctionReturn(0); 4488 } 4489 4490 #undef __FUNCT__ 4491 #define __FUNCT__ "MatGetLocalMat" 4492 /*@ 4493 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4494 4495 Not Collective 4496 4497 Input Parameters: 4498 + A - the matrix 4499 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4500 4501 Output Parameter: 4502 . A_loc - the local sequential matrix generated 4503 4504 Level: developer 4505 4506 @*/ 4507 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4508 { 4509 PetscErrorCode ierr; 4510 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4511 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4512 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4513 MatScalar *aa=a->a,*ba=b->a,*cam; 4514 PetscScalar *ca; 4515 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4516 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4517 4518 PetscFunctionBegin; 4519 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4520 if (scall == MAT_INITIAL_MATRIX){ 4521 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4522 ci[0] = 0; 4523 for (i=0; i<am; i++){ 4524 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4525 } 4526 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4527 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4528 k = 0; 4529 for (i=0; i<am; i++) { 4530 ncols_o = bi[i+1] - bi[i]; 4531 ncols_d = ai[i+1] - ai[i]; 4532 /* off-diagonal portion of A */ 4533 for (jo=0; jo<ncols_o; jo++) { 4534 col = cmap[*bj]; 4535 if (col >= cstart) break; 4536 cj[k] = col; bj++; 4537 ca[k++] = *ba++; 4538 } 4539 /* diagonal portion of A */ 4540 for (j=0; j<ncols_d; j++) { 4541 cj[k] = cstart + *aj++; 4542 ca[k++] = *aa++; 4543 } 4544 /* off-diagonal portion of A */ 4545 for (j=jo; j<ncols_o; j++) { 4546 cj[k] = cmap[*bj++]; 4547 ca[k++] = *ba++; 4548 } 4549 } 4550 /* put together the new matrix */ 4551 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4552 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4553 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4554 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4555 mat->free_a = PETSC_TRUE; 4556 mat->free_ij = PETSC_TRUE; 4557 mat->nonew = 0; 4558 } else if (scall == MAT_REUSE_MATRIX){ 4559 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4560 ci = mat->i; cj = mat->j; cam = mat->a; 4561 for (i=0; i<am; i++) { 4562 /* off-diagonal portion of A */ 4563 ncols_o = bi[i+1] - bi[i]; 4564 for (jo=0; jo<ncols_o; jo++) { 4565 col = cmap[*bj]; 4566 if (col >= cstart) break; 4567 *cam++ = *ba++; bj++; 4568 } 4569 /* diagonal portion of A */ 4570 ncols_d = ai[i+1] - ai[i]; 4571 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4572 /* off-diagonal portion of A */ 4573 for (j=jo; j<ncols_o; j++) { 4574 *cam++ = *ba++; bj++; 4575 } 4576 } 4577 } else { 4578 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4579 } 4580 4581 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4582 PetscFunctionReturn(0); 4583 } 4584 4585 #undef __FUNCT__ 4586 #define __FUNCT__ "MatGetLocalMatCondensed" 4587 /*@C 4588 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4589 4590 Not Collective 4591 4592 Input Parameters: 4593 + A - the matrix 4594 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4595 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4596 4597 Output Parameter: 4598 . A_loc - the local sequential matrix generated 4599 4600 Level: developer 4601 4602 @*/ 4603 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4604 { 4605 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4606 PetscErrorCode ierr; 4607 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4608 IS isrowa,iscola; 4609 Mat *aloc; 4610 4611 PetscFunctionBegin; 4612 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4613 if (!row){ 4614 start = A->rmap->rstart; end = A->rmap->rend; 4615 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4616 } else { 4617 isrowa = *row; 4618 } 4619 if (!col){ 4620 start = A->cmap->rstart; 4621 cmap = a->garray; 4622 nzA = a->A->cmap->n; 4623 nzB = a->B->cmap->n; 4624 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4625 ncols = 0; 4626 for (i=0; i<nzB; i++) { 4627 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4628 else break; 4629 } 4630 imark = i; 4631 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4632 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4633 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4634 ierr = PetscFree(idx);CHKERRQ(ierr); 4635 } else { 4636 iscola = *col; 4637 } 4638 if (scall != MAT_INITIAL_MATRIX){ 4639 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4640 aloc[0] = *A_loc; 4641 } 4642 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4643 *A_loc = aloc[0]; 4644 ierr = PetscFree(aloc);CHKERRQ(ierr); 4645 if (!row){ 4646 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4647 } 4648 if (!col){ 4649 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4650 } 4651 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4652 PetscFunctionReturn(0); 4653 } 4654 4655 #undef __FUNCT__ 4656 #define __FUNCT__ "MatGetBrowsOfAcols" 4657 /*@C 4658 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4659 4660 Collective on Mat 4661 4662 Input Parameters: 4663 + A,B - the matrices in mpiaij format 4664 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4665 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4666 4667 Output Parameter: 4668 + rowb, colb - index sets of rows and columns of B to extract 4669 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4670 - B_seq - the sequential matrix generated 4671 4672 Level: developer 4673 4674 @*/ 4675 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4676 { 4677 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4678 PetscErrorCode ierr; 4679 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4680 IS isrowb,iscolb; 4681 Mat *bseq; 4682 4683 PetscFunctionBegin; 4684 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4685 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4686 } 4687 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4688 4689 if (scall == MAT_INITIAL_MATRIX){ 4690 start = A->cmap->rstart; 4691 cmap = a->garray; 4692 nzA = a->A->cmap->n; 4693 nzB = a->B->cmap->n; 4694 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4695 ncols = 0; 4696 for (i=0; i<nzB; i++) { /* row < local row index */ 4697 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4698 else break; 4699 } 4700 imark = i; 4701 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4702 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4703 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4704 ierr = PetscFree(idx);CHKERRQ(ierr); 4705 *brstart = imark; 4706 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4707 } else { 4708 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4709 isrowb = *rowb; iscolb = *colb; 4710 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4711 bseq[0] = *B_seq; 4712 } 4713 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4714 *B_seq = bseq[0]; 4715 ierr = PetscFree(bseq);CHKERRQ(ierr); 4716 if (!rowb){ 4717 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4718 } else { 4719 *rowb = isrowb; 4720 } 4721 if (!colb){ 4722 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4723 } else { 4724 *colb = iscolb; 4725 } 4726 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4727 PetscFunctionReturn(0); 4728 } 4729 4730 #undef __FUNCT__ 4731 #define __FUNCT__ "MatGetBrowsOfAoCols" 4732 /*@C 4733 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4734 of the OFF-DIAGONAL portion of local A 4735 4736 Collective on Mat 4737 4738 Input Parameters: 4739 + A,B - the matrices in mpiaij format 4740 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4741 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4742 . startsj_r - similar to startsj for receives 4743 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4744 4745 Output Parameter: 4746 + B_oth - the sequential matrix generated 4747 4748 Level: developer 4749 4750 @*/ 4751 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4752 { 4753 VecScatter_MPI_General *gen_to,*gen_from; 4754 PetscErrorCode ierr; 4755 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4756 Mat_SeqAIJ *b_oth; 4757 VecScatter ctx=a->Mvctx; 4758 MPI_Comm comm=((PetscObject)ctx)->comm; 4759 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4760 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4761 PetscScalar *rvalues,*svalues; 4762 MatScalar *b_otha,*bufa,*bufA; 4763 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4764 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4765 MPI_Status *sstatus,rstatus; 4766 PetscMPIInt jj; 4767 PetscInt *cols,sbs,rbs; 4768 PetscScalar *vals; 4769 4770 PetscFunctionBegin; 4771 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4772 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4773 } 4774 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4775 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4776 4777 gen_to = (VecScatter_MPI_General*)ctx->todata; 4778 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4779 rvalues = gen_from->values; /* holds the length of receiving row */ 4780 svalues = gen_to->values; /* holds the length of sending row */ 4781 nrecvs = gen_from->n; 4782 nsends = gen_to->n; 4783 4784 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4785 srow = gen_to->indices; /* local row index to be sent */ 4786 sstarts = gen_to->starts; 4787 sprocs = gen_to->procs; 4788 sstatus = gen_to->sstatus; 4789 sbs = gen_to->bs; 4790 rstarts = gen_from->starts; 4791 rprocs = gen_from->procs; 4792 rbs = gen_from->bs; 4793 4794 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4795 if (scall == MAT_INITIAL_MATRIX){ 4796 /* i-array */ 4797 /*---------*/ 4798 /* post receives */ 4799 for (i=0; i<nrecvs; i++){ 4800 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4801 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4802 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4803 } 4804 4805 /* pack the outgoing message */ 4806 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4807 sstartsj[0] = 0; rstartsj[0] = 0; 4808 len = 0; /* total length of j or a array to be sent */ 4809 k = 0; 4810 for (i=0; i<nsends; i++){ 4811 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4812 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4813 for (j=0; j<nrows; j++) { 4814 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4815 for (l=0; l<sbs; l++){ 4816 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4817 rowlen[j*sbs+l] = ncols; 4818 len += ncols; 4819 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4820 } 4821 k++; 4822 } 4823 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4824 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4825 } 4826 /* recvs and sends of i-array are completed */ 4827 i = nrecvs; 4828 while (i--) { 4829 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4830 } 4831 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4832 4833 /* allocate buffers for sending j and a arrays */ 4834 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4835 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4836 4837 /* create i-array of B_oth */ 4838 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4839 b_othi[0] = 0; 4840 len = 0; /* total length of j or a array to be received */ 4841 k = 0; 4842 for (i=0; i<nrecvs; i++){ 4843 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4844 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4845 for (j=0; j<nrows; j++) { 4846 b_othi[k+1] = b_othi[k] + rowlen[j]; 4847 len += rowlen[j]; k++; 4848 } 4849 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4850 } 4851 4852 /* allocate space for j and a arrrays of B_oth */ 4853 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4854 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4855 4856 /* j-array */ 4857 /*---------*/ 4858 /* post receives of j-array */ 4859 for (i=0; i<nrecvs; i++){ 4860 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4861 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4862 } 4863 4864 /* pack the outgoing message j-array */ 4865 k = 0; 4866 for (i=0; i<nsends; i++){ 4867 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4868 bufJ = bufj+sstartsj[i]; 4869 for (j=0; j<nrows; j++) { 4870 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4871 for (ll=0; ll<sbs; ll++){ 4872 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4873 for (l=0; l<ncols; l++){ 4874 *bufJ++ = cols[l]; 4875 } 4876 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4877 } 4878 } 4879 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4880 } 4881 4882 /* recvs and sends of j-array are completed */ 4883 i = nrecvs; 4884 while (i--) { 4885 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4886 } 4887 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4888 } else if (scall == MAT_REUSE_MATRIX){ 4889 sstartsj = *startsj; 4890 rstartsj = *startsj_r; 4891 bufa = *bufa_ptr; 4892 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4893 b_otha = b_oth->a; 4894 } else { 4895 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4896 } 4897 4898 /* a-array */ 4899 /*---------*/ 4900 /* post receives of a-array */ 4901 for (i=0; i<nrecvs; i++){ 4902 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4903 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4904 } 4905 4906 /* pack the outgoing message a-array */ 4907 k = 0; 4908 for (i=0; i<nsends; i++){ 4909 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4910 bufA = bufa+sstartsj[i]; 4911 for (j=0; j<nrows; j++) { 4912 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4913 for (ll=0; ll<sbs; ll++){ 4914 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4915 for (l=0; l<ncols; l++){ 4916 *bufA++ = vals[l]; 4917 } 4918 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4919 } 4920 } 4921 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4922 } 4923 /* recvs and sends of a-array are completed */ 4924 i = nrecvs; 4925 while (i--) { 4926 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4927 } 4928 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4929 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4930 4931 if (scall == MAT_INITIAL_MATRIX){ 4932 /* put together the new matrix */ 4933 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4934 4935 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4936 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4937 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4938 b_oth->free_a = PETSC_TRUE; 4939 b_oth->free_ij = PETSC_TRUE; 4940 b_oth->nonew = 0; 4941 4942 ierr = PetscFree(bufj);CHKERRQ(ierr); 4943 if (!startsj || !bufa_ptr){ 4944 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4945 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4946 } else { 4947 *startsj = sstartsj; 4948 *startsj_r = rstartsj; 4949 *bufa_ptr = bufa; 4950 } 4951 } 4952 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4953 PetscFunctionReturn(0); 4954 } 4955 4956 #undef __FUNCT__ 4957 #define __FUNCT__ "MatGetCommunicationStructs" 4958 /*@C 4959 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4960 4961 Not Collective 4962 4963 Input Parameters: 4964 . A - The matrix in mpiaij format 4965 4966 Output Parameter: 4967 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4968 . colmap - A map from global column index to local index into lvec 4969 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4970 4971 Level: developer 4972 4973 @*/ 4974 #if defined (PETSC_USE_CTABLE) 4975 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4976 #else 4977 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4978 #endif 4979 { 4980 Mat_MPIAIJ *a; 4981 4982 PetscFunctionBegin; 4983 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4984 PetscValidPointer(lvec, 2); 4985 PetscValidPointer(colmap, 3); 4986 PetscValidPointer(multScatter, 4); 4987 a = (Mat_MPIAIJ *) A->data; 4988 if (lvec) *lvec = a->lvec; 4989 if (colmap) *colmap = a->colmap; 4990 if (multScatter) *multScatter = a->Mvctx; 4991 PetscFunctionReturn(0); 4992 } 4993 4994 EXTERN_C_BEGIN 4995 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4996 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4997 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4998 EXTERN_C_END 4999 5000 #undef __FUNCT__ 5001 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5002 /* 5003 Computes (B'*A')' since computing B*A directly is untenable 5004 5005 n p p 5006 ( ) ( ) ( ) 5007 m ( A ) * n ( B ) = m ( C ) 5008 ( ) ( ) ( ) 5009 5010 */ 5011 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5012 { 5013 PetscErrorCode ierr; 5014 Mat At,Bt,Ct; 5015 5016 PetscFunctionBegin; 5017 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5018 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5019 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5020 ierr = MatDestroy(At);CHKERRQ(ierr); 5021 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5022 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5023 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 #undef __FUNCT__ 5028 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5029 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5030 { 5031 PetscErrorCode ierr; 5032 PetscInt m=A->rmap->n,n=B->cmap->n; 5033 Mat Cmat; 5034 5035 PetscFunctionBegin; 5036 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5037 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5038 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5039 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5040 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5041 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5042 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5043 *C = Cmat; 5044 PetscFunctionReturn(0); 5045 } 5046 5047 /* ----------------------------------------------------------------*/ 5048 #undef __FUNCT__ 5049 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5050 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5051 { 5052 PetscErrorCode ierr; 5053 5054 PetscFunctionBegin; 5055 if (scall == MAT_INITIAL_MATRIX){ 5056 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5057 } 5058 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5059 PetscFunctionReturn(0); 5060 } 5061 5062 EXTERN_C_BEGIN 5063 #if defined(PETSC_HAVE_MUMPS) 5064 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5065 #endif 5066 #if defined(PETSC_HAVE_PASTIX) 5067 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5068 #endif 5069 #if defined(PETSC_HAVE_SUPERLU_DIST) 5070 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5071 #endif 5072 #if defined(PETSC_HAVE_SPOOLES) 5073 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5074 #endif 5075 EXTERN_C_END 5076 5077 /*MC 5078 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5079 5080 Options Database Keys: 5081 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5082 5083 Level: beginner 5084 5085 .seealso: MatCreateMPIAIJ() 5086 M*/ 5087 5088 EXTERN_C_BEGIN 5089 #undef __FUNCT__ 5090 #define __FUNCT__ "MatCreate_MPIAIJ" 5091 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5092 { 5093 Mat_MPIAIJ *b; 5094 PetscErrorCode ierr; 5095 PetscMPIInt size; 5096 5097 PetscFunctionBegin; 5098 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5099 5100 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5101 B->data = (void*)b; 5102 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5103 B->rmap->bs = 1; 5104 B->assembled = PETSC_FALSE; 5105 B->mapping = 0; 5106 5107 B->insertmode = NOT_SET_VALUES; 5108 b->size = size; 5109 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5110 5111 /* build cache for off array entries formed */ 5112 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5113 b->donotstash = PETSC_FALSE; 5114 b->colmap = 0; 5115 b->garray = 0; 5116 b->roworiented = PETSC_TRUE; 5117 5118 /* stuff used for matrix vector multiply */ 5119 b->lvec = PETSC_NULL; 5120 b->Mvctx = PETSC_NULL; 5121 5122 /* stuff for MatGetRow() */ 5123 b->rowindices = 0; 5124 b->rowvalues = 0; 5125 b->getrowactive = PETSC_FALSE; 5126 5127 #if defined(PETSC_HAVE_SPOOLES) 5128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5129 "MatGetFactor_mpiaij_spooles", 5130 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5131 #endif 5132 #if defined(PETSC_HAVE_MUMPS) 5133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5134 "MatGetFactor_aij_mumps", 5135 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5136 #endif 5137 #if defined(PETSC_HAVE_PASTIX) 5138 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5139 "MatGetFactor_mpiaij_pastix", 5140 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5141 #endif 5142 #if defined(PETSC_HAVE_SUPERLU_DIST) 5143 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5144 "MatGetFactor_mpiaij_superlu_dist", 5145 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5146 #endif 5147 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5148 "MatStoreValues_MPIAIJ", 5149 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5151 "MatRetrieveValues_MPIAIJ", 5152 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5153 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5154 "MatGetDiagonalBlock_MPIAIJ", 5155 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5156 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5157 "MatIsTranspose_MPIAIJ", 5158 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5159 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5160 "MatMPIAIJSetPreallocation_MPIAIJ", 5161 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5162 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5163 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5164 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5165 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5166 "MatDiagonalScaleLocal_MPIAIJ", 5167 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5168 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5169 "MatConvert_MPIAIJ_MPIAIJPERM", 5170 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5171 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5172 "MatConvert_MPIAIJ_MPIAIJCRL", 5173 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5174 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5175 "MatConvert_MPIAIJ_MPISBAIJ", 5176 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5177 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5178 "MatMatMult_MPIDense_MPIAIJ", 5179 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5180 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5181 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5182 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5183 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5184 "MatMatMultNumeric_MPIDense_MPIAIJ", 5185 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5186 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5187 PetscFunctionReturn(0); 5188 } 5189 EXTERN_C_END 5190 5191 #undef __FUNCT__ 5192 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5193 /*@ 5194 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5195 and "off-diagonal" part of the matrix in CSR format. 5196 5197 Collective on MPI_Comm 5198 5199 Input Parameters: 5200 + comm - MPI communicator 5201 . m - number of local rows (Cannot be PETSC_DECIDE) 5202 . n - This value should be the same as the local size used in creating the 5203 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5204 calculated if N is given) For square matrices n is almost always m. 5205 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5206 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5207 . i - row indices for "diagonal" portion of matrix 5208 . j - column indices 5209 . a - matrix values 5210 . oi - row indices for "off-diagonal" portion of matrix 5211 . oj - column indices 5212 - oa - matrix values 5213 5214 Output Parameter: 5215 . mat - the matrix 5216 5217 Level: advanced 5218 5219 Notes: 5220 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5221 5222 The i and j indices are 0 based 5223 5224 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5225 5226 This sets local rows and cannot be used to set off-processor values. 5227 5228 You cannot later use MatSetValues() to change values in this matrix. 5229 5230 .keywords: matrix, aij, compressed row, sparse, parallel 5231 5232 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5233 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5234 @*/ 5235 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5236 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5237 { 5238 PetscErrorCode ierr; 5239 Mat_MPIAIJ *maij; 5240 5241 PetscFunctionBegin; 5242 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5243 if (i[0]) { 5244 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5245 } 5246 if (oi[0]) { 5247 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5248 } 5249 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5250 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5251 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5252 maij = (Mat_MPIAIJ*) (*mat)->data; 5253 maij->donotstash = PETSC_TRUE; 5254 (*mat)->preallocated = PETSC_TRUE; 5255 5256 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5257 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5258 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5259 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5260 5261 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5262 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5263 5264 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5265 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5266 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5267 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5268 5269 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5270 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5271 PetscFunctionReturn(0); 5272 } 5273 5274 /* 5275 Special version for direct calls from Fortran 5276 */ 5277 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5278 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5279 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5280 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5281 #endif 5282 5283 /* Change these macros so can be used in void function */ 5284 #undef CHKERRQ 5285 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5286 #undef SETERRQ2 5287 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5288 #undef SETERRQ 5289 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5290 5291 EXTERN_C_BEGIN 5292 #undef __FUNCT__ 5293 #define __FUNCT__ "matsetvaluesmpiaij_" 5294 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5295 { 5296 Mat mat = *mmat; 5297 PetscInt m = *mm, n = *mn; 5298 InsertMode addv = *maddv; 5299 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5300 PetscScalar value; 5301 PetscErrorCode ierr; 5302 5303 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5304 if (mat->insertmode == NOT_SET_VALUES) { 5305 mat->insertmode = addv; 5306 } 5307 #if defined(PETSC_USE_DEBUG) 5308 else if (mat->insertmode != addv) { 5309 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5310 } 5311 #endif 5312 { 5313 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5314 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5315 PetscBool roworiented = aij->roworiented; 5316 5317 /* Some Variables required in the macro */ 5318 Mat A = aij->A; 5319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5320 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5321 MatScalar *aa = a->a; 5322 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5323 Mat B = aij->B; 5324 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5325 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5326 MatScalar *ba = b->a; 5327 5328 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5329 PetscInt nonew = a->nonew; 5330 MatScalar *ap1,*ap2; 5331 5332 PetscFunctionBegin; 5333 for (i=0; i<m; i++) { 5334 if (im[i] < 0) continue; 5335 #if defined(PETSC_USE_DEBUG) 5336 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5337 #endif 5338 if (im[i] >= rstart && im[i] < rend) { 5339 row = im[i] - rstart; 5340 lastcol1 = -1; 5341 rp1 = aj + ai[row]; 5342 ap1 = aa + ai[row]; 5343 rmax1 = aimax[row]; 5344 nrow1 = ailen[row]; 5345 low1 = 0; 5346 high1 = nrow1; 5347 lastcol2 = -1; 5348 rp2 = bj + bi[row]; 5349 ap2 = ba + bi[row]; 5350 rmax2 = bimax[row]; 5351 nrow2 = bilen[row]; 5352 low2 = 0; 5353 high2 = nrow2; 5354 5355 for (j=0; j<n; j++) { 5356 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5357 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5358 if (in[j] >= cstart && in[j] < cend){ 5359 col = in[j] - cstart; 5360 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5361 } else if (in[j] < 0) continue; 5362 #if defined(PETSC_USE_DEBUG) 5363 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5364 #endif 5365 else { 5366 if (mat->was_assembled) { 5367 if (!aij->colmap) { 5368 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5369 } 5370 #if defined (PETSC_USE_CTABLE) 5371 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5372 col--; 5373 #else 5374 col = aij->colmap[in[j]] - 1; 5375 #endif 5376 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5377 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5378 col = in[j]; 5379 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5380 B = aij->B; 5381 b = (Mat_SeqAIJ*)B->data; 5382 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5383 rp2 = bj + bi[row]; 5384 ap2 = ba + bi[row]; 5385 rmax2 = bimax[row]; 5386 nrow2 = bilen[row]; 5387 low2 = 0; 5388 high2 = nrow2; 5389 bm = aij->B->rmap->n; 5390 ba = b->a; 5391 } 5392 } else col = in[j]; 5393 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5394 } 5395 } 5396 } else { 5397 if (!aij->donotstash) { 5398 if (roworiented) { 5399 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5400 } else { 5401 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5402 } 5403 } 5404 } 5405 }} 5406 PetscFunctionReturnVoid(); 5407 } 5408 EXTERN_C_END 5409 5410