1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscTruth aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscTruth roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscTruth ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscTruth other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscTruth found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 /* inform other processors of number of messages and max length*/ 606 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 607 608 /* post receives: */ 609 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 610 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 611 for (i=0; i<nrecvs; i++) { 612 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 613 } 614 615 /* do sends: 616 1) starts[i] gives the starting index in svalues for stuff going to 617 the ith processor 618 */ 619 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 620 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 621 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 622 starts[0] = 0; 623 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 624 for (i=0; i<N; i++) { 625 svalues[starts[owner[i]]++] = rows[i]; 626 } 627 628 starts[0] = 0; 629 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 count = 0; 631 for (i=0; i<size; i++) { 632 if (nprocs[2*i+1]) { 633 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscFree(starts);CHKERRQ(ierr); 637 638 base = owners[rank]; 639 640 /* wait on receives */ 641 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 642 count = nrecvs; slen = 0; 643 while (count) { 644 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 645 /* unpack receives into our local space */ 646 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 647 source[imdex] = recv_status.MPI_SOURCE; 648 lens[imdex] = n; 649 slen += n; 650 count--; 651 } 652 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 653 654 /* move the data into the send scatter */ 655 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 656 count = 0; 657 for (i=0; i<nrecvs; i++) { 658 values = rvalues + i*nmax; 659 for (j=0; j<lens[i]; j++) { 660 lrows[count++] = values[j] - base; 661 } 662 } 663 ierr = PetscFree(rvalues);CHKERRQ(ierr); 664 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 665 ierr = PetscFree(owner);CHKERRQ(ierr); 666 ierr = PetscFree(nprocs);CHKERRQ(ierr); 667 668 /* actually zap the local rows */ 669 /* 670 Zero the required rows. If the "diagonal block" of the matrix 671 is square and the user wishes to set the diagonal we use separate 672 code so that MatSetValues() is not called for each diagonal allocating 673 new memory, thus calling lots of mallocs and slowing things down. 674 675 */ 676 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 677 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 678 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 679 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 680 } else if (diag != 0.0) { 681 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 682 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 683 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 684 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 685 } 686 for (i = 0; i < slen; i++) { 687 row = lrows[i] + rstart; 688 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 692 } else { 693 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 694 } 695 ierr = PetscFree(lrows);CHKERRQ(ierr); 696 697 /* wait on sends */ 698 if (nsends) { 699 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 700 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 701 ierr = PetscFree(send_status);CHKERRQ(ierr); 702 } 703 ierr = PetscFree(send_waits);CHKERRQ(ierr); 704 ierr = PetscFree(svalues);CHKERRQ(ierr); 705 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatMult_MPIAIJ" 711 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 712 { 713 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 714 PetscErrorCode ierr; 715 PetscInt nt; 716 717 PetscFunctionBegin; 718 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 719 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 720 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 721 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 722 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 723 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 729 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 730 { 731 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatMultAdd_MPIAIJ" 741 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 742 { 743 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 749 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 750 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 756 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 757 { 758 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 PetscTruth merged; 761 762 PetscFunctionBegin; 763 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 764 /* do nondiagonal part */ 765 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 766 if (!merged) { 767 /* send it on its way */ 768 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 769 /* do local part */ 770 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 771 /* receive remote parts: note this assumes the values are not actually */ 772 /* added in yy until the next line, */ 773 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 774 } else { 775 /* do local part */ 776 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 777 /* send it on its way */ 778 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 779 /* values actually were received in the Begin() but we need to call this nop */ 780 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 781 } 782 PetscFunctionReturn(0); 783 } 784 785 EXTERN_C_BEGIN 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 788 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 789 { 790 MPI_Comm comm; 791 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 792 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 793 IS Me,Notme; 794 PetscErrorCode ierr; 795 PetscInt M,N,first,last,*notme,i; 796 PetscMPIInt size; 797 798 PetscFunctionBegin; 799 800 /* Easy test: symmetric diagonal block */ 801 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 802 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 803 if (!*f) PetscFunctionReturn(0); 804 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 805 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 806 if (size == 1) PetscFunctionReturn(0); 807 808 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 809 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 810 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 811 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 812 for (i=0; i<first; i++) notme[i] = i; 813 for (i=last; i<M; i++) notme[i-last+first] = i; 814 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 815 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 816 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 817 Aoff = Aoffs[0]; 818 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 819 Boff = Boffs[0]; 820 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 821 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 822 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 823 ierr = ISDestroy(Me);CHKERRQ(ierr); 824 ierr = ISDestroy(Notme);CHKERRQ(ierr); 825 826 PetscFunctionReturn(0); 827 } 828 EXTERN_C_END 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 832 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 833 { 834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 /* do nondiagonal part */ 839 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 840 /* send it on its way */ 841 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 842 /* do local part */ 843 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 844 /* receive remote parts */ 845 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 /* 850 This only works correctly for square matrices where the subblock A->A is the 851 diagonal block 852 */ 853 #undef __FUNCT__ 854 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 855 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 856 { 857 PetscErrorCode ierr; 858 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 859 860 PetscFunctionBegin; 861 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 862 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 863 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatScale_MPIAIJ" 869 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 870 { 871 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 872 PetscErrorCode ierr; 873 874 PetscFunctionBegin; 875 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 876 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 880 #undef __FUNCT__ 881 #define __FUNCT__ "MatDestroy_MPIAIJ" 882 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 883 { 884 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 885 PetscErrorCode ierr; 886 887 PetscFunctionBegin; 888 #if defined(PETSC_USE_LOG) 889 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 890 #endif 891 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 892 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 893 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 894 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 895 #if defined (PETSC_USE_CTABLE) 896 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 897 #else 898 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 899 #endif 900 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 901 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 902 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 903 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 904 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 905 ierr = PetscFree(aij);CHKERRQ(ierr); 906 907 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 909 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 #undef __FUNCT__ 920 #define __FUNCT__ "MatView_MPIAIJ_Binary" 921 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 922 { 923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 924 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 925 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 926 PetscErrorCode ierr; 927 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 928 int fd; 929 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 930 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 931 PetscScalar *column_values; 932 933 PetscFunctionBegin; 934 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 935 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 936 nz = A->nz + B->nz; 937 if (!rank) { 938 header[0] = MAT_FILE_CLASSID; 939 header[1] = mat->rmap->N; 940 header[2] = mat->cmap->N; 941 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 942 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 943 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 944 /* get largest number of rows any processor has */ 945 rlen = mat->rmap->n; 946 range = mat->rmap->range; 947 for (i=1; i<size; i++) { 948 rlen = PetscMax(rlen,range[i+1] - range[i]); 949 } 950 } else { 951 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 952 rlen = mat->rmap->n; 953 } 954 955 /* load up the local row counts */ 956 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 957 for (i=0; i<mat->rmap->n; i++) { 958 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 959 } 960 961 /* store the row lengths to the file */ 962 if (!rank) { 963 MPI_Status status; 964 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 965 for (i=1; i<size; i++) { 966 rlen = range[i+1] - range[i]; 967 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 968 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 969 } 970 } else { 971 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 972 } 973 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 974 975 /* load up the local column indices */ 976 nzmax = nz; /* )th processor needs space a largest processor needs */ 977 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 979 cnt = 0; 980 for (i=0; i<mat->rmap->n; i++) { 981 for (j=B->i[i]; j<B->i[i+1]; j++) { 982 if ( (col = garray[B->j[j]]) > cstart) break; 983 column_indices[cnt++] = col; 984 } 985 for (k=A->i[i]; k<A->i[i+1]; k++) { 986 column_indices[cnt++] = A->j[k] + cstart; 987 } 988 for (; j<B->i[i+1]; j++) { 989 column_indices[cnt++] = garray[B->j[j]]; 990 } 991 } 992 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 993 994 /* store the column indices to the file */ 995 if (!rank) { 996 MPI_Status status; 997 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 998 for (i=1; i<size; i++) { 999 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1000 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1001 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1002 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1003 } 1004 } else { 1005 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1006 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1007 } 1008 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1009 1010 /* load up the local column values */ 1011 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1012 cnt = 0; 1013 for (i=0; i<mat->rmap->n; i++) { 1014 for (j=B->i[i]; j<B->i[i+1]; j++) { 1015 if ( garray[B->j[j]] > cstart) break; 1016 column_values[cnt++] = B->a[j]; 1017 } 1018 for (k=A->i[i]; k<A->i[i+1]; k++) { 1019 column_values[cnt++] = A->a[k]; 1020 } 1021 for (; j<B->i[i+1]; j++) { 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 } 1025 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1026 1027 /* store the column values to the file */ 1028 if (!rank) { 1029 MPI_Status status; 1030 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1031 for (i=1; i<size; i++) { 1032 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1033 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1034 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1035 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1036 } 1037 } else { 1038 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1039 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1040 } 1041 ierr = PetscFree(column_values);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1047 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1048 { 1049 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1050 PetscErrorCode ierr; 1051 PetscMPIInt rank = aij->rank,size = aij->size; 1052 PetscTruth isdraw,iascii,isbinary; 1053 PetscViewer sviewer; 1054 PetscViewerFormat format; 1055 1056 PetscFunctionBegin; 1057 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1059 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1060 if (iascii) { 1061 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1062 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1063 MatInfo info; 1064 PetscTruth inodes; 1065 1066 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1067 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1068 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1069 if (!inodes) { 1070 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1071 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1072 } else { 1073 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1074 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1075 } 1076 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1078 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1080 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1082 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1085 PetscInt inodecount,inodelimit,*inodes; 1086 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1087 if (inodes) { 1088 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1089 } else { 1090 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1091 } 1092 PetscFunctionReturn(0); 1093 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1094 PetscFunctionReturn(0); 1095 } 1096 } else if (isbinary) { 1097 if (size == 1) { 1098 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1099 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1100 } else { 1101 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1102 } 1103 PetscFunctionReturn(0); 1104 } else if (isdraw) { 1105 PetscDraw draw; 1106 PetscTruth isnull; 1107 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1108 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1109 } 1110 1111 if (size == 1) { 1112 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1113 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1114 } else { 1115 /* assemble the entire matrix onto first processor. */ 1116 Mat A; 1117 Mat_SeqAIJ *Aloc; 1118 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1119 MatScalar *a; 1120 1121 if (mat->rmap->N > 1024) { 1122 PetscTruth flg = PETSC_FALSE; 1123 1124 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1125 if (!flg) { 1126 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1127 } 1128 } 1129 1130 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1131 if (!rank) { 1132 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1135 } 1136 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1137 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1138 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1139 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1140 1141 /* copy over the A part */ 1142 Aloc = (Mat_SeqAIJ*)aij->A->data; 1143 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1144 row = mat->rmap->rstart; 1145 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1146 for (i=0; i<m; i++) { 1147 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1148 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1149 } 1150 aj = Aloc->j; 1151 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1152 1153 /* copy over the B part */ 1154 Aloc = (Mat_SeqAIJ*)aij->B->data; 1155 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1156 row = mat->rmap->rstart; 1157 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1158 ct = cols; 1159 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1160 for (i=0; i<m; i++) { 1161 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1162 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1163 } 1164 ierr = PetscFree(ct);CHKERRQ(ierr); 1165 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1167 /* 1168 Everyone has to call to draw the matrix since the graphics waits are 1169 synchronized across all processors that share the PetscDraw object 1170 */ 1171 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1172 if (!rank) { 1173 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1174 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1175 } 1176 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1177 ierr = MatDestroy(A);CHKERRQ(ierr); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatView_MPIAIJ" 1184 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1185 { 1186 PetscErrorCode ierr; 1187 PetscTruth iascii,isdraw,issocket,isbinary; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1193 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1194 if (iascii || isdraw || isbinary || issocket) { 1195 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1196 } else { 1197 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSOR_MPIAIJ" 1204 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1205 { 1206 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1207 PetscErrorCode ierr; 1208 Vec bb1 = 0; 1209 PetscTruth hasop; 1210 1211 PetscFunctionBegin; 1212 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1213 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1214 } 1215 1216 if (flag == SOR_APPLY_UPPER) { 1217 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1222 if (flag & SOR_ZERO_INITIAL_GUESS) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 its--; 1225 } 1226 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1253 } 1254 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1255 if (flag & SOR_ZERO_INITIAL_GUESS) { 1256 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1257 its--; 1258 } 1259 while (its--) { 1260 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 1263 /* update rhs: bb1 = bb - B*x */ 1264 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1265 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1266 1267 /* local sweep */ 1268 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1269 } 1270 } else if (flag & SOR_EISENSTAT) { 1271 Vec xx1; 1272 1273 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1274 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1275 1276 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 if (!mat->diag) { 1279 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1280 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1281 } 1282 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1283 if (hasop) { 1284 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1285 } else { 1286 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1287 } 1288 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1289 1290 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1291 1292 /* local sweep */ 1293 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1294 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1295 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1296 } else { 1297 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1298 } 1299 1300 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatPermute_MPIAIJ" 1306 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1307 { 1308 MPI_Comm comm,pcomm; 1309 PetscInt first,local_size,nrows; 1310 const PetscInt *rows; 1311 PetscMPIInt size; 1312 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1317 /* make a collective version of 'rowp' */ 1318 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1319 if (pcomm==comm) { 1320 crowp = rowp; 1321 } else { 1322 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1323 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1324 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1325 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1326 } 1327 /* collect the global row permutation and invert it */ 1328 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1329 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1330 if (pcomm!=comm) { 1331 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1332 } 1333 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1334 /* get the local target indices */ 1335 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1337 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1338 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1339 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1340 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1341 /* the column permutation is so much easier; 1342 make a local version of 'colp' and invert it */ 1343 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1344 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1345 if (size==1) { 1346 lcolp = colp; 1347 } else { 1348 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1349 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1350 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1351 } 1352 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1353 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1354 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1355 if (size>1) { 1356 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1357 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1358 } 1359 /* now we just get the submatrix */ 1360 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1361 /* clean up */ 1362 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1363 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1369 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1370 { 1371 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1372 Mat A = mat->A,B = mat->B; 1373 PetscErrorCode ierr; 1374 PetscReal isend[5],irecv[5]; 1375 1376 PetscFunctionBegin; 1377 info->block_size = 1.0; 1378 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1379 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1380 isend[3] = info->memory; isend[4] = info->mallocs; 1381 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1383 isend[3] += info->memory; isend[4] += info->mallocs; 1384 if (flag == MAT_LOCAL) { 1385 info->nz_used = isend[0]; 1386 info->nz_allocated = isend[1]; 1387 info->nz_unneeded = isend[2]; 1388 info->memory = isend[3]; 1389 info->mallocs = isend[4]; 1390 } else if (flag == MAT_GLOBAL_MAX) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else if (flag == MAT_GLOBAL_SUM) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } 1405 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1406 info->fill_ratio_needed = 0; 1407 info->factor_mallocs = 0; 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatSetOption_MPIAIJ" 1414 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1415 { 1416 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 switch (op) { 1421 case MAT_NEW_NONZERO_LOCATIONS: 1422 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1423 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1424 case MAT_KEEP_NONZERO_PATTERN: 1425 case MAT_NEW_NONZERO_LOCATION_ERR: 1426 case MAT_USE_INODES: 1427 case MAT_IGNORE_ZERO_ENTRIES: 1428 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1429 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1430 break; 1431 case MAT_ROW_ORIENTED: 1432 a->roworiented = flg; 1433 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1434 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1435 break; 1436 case MAT_NEW_DIAGONALS: 1437 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1438 break; 1439 case MAT_IGNORE_OFF_PROC_ENTRIES: 1440 a->donotstash = PETSC_TRUE; 1441 break; 1442 case MAT_SPD: 1443 A->spd_set = PETSC_TRUE; 1444 A->spd = flg; 1445 if (flg) { 1446 A->symmetric = PETSC_TRUE; 1447 A->structurally_symmetric = PETSC_TRUE; 1448 A->symmetric_set = PETSC_TRUE; 1449 A->structurally_symmetric_set = PETSC_TRUE; 1450 } 1451 break; 1452 case MAT_SYMMETRIC: 1453 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1454 break; 1455 case MAT_STRUCTURALLY_SYMMETRIC: 1456 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1457 break; 1458 case MAT_HERMITIAN: 1459 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1460 break; 1461 case MAT_SYMMETRY_ETERNAL: 1462 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1463 break; 1464 default: 1465 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatGetRow_MPIAIJ" 1472 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1473 { 1474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1475 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1476 PetscErrorCode ierr; 1477 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1478 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1479 PetscInt *cmap,*idx_p; 1480 1481 PetscFunctionBegin; 1482 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1483 mat->getrowactive = PETSC_TRUE; 1484 1485 if (!mat->rowvalues && (idx || v)) { 1486 /* 1487 allocate enough space to hold information from the longest row. 1488 */ 1489 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1490 PetscInt max = 1,tmp; 1491 for (i=0; i<matin->rmap->n; i++) { 1492 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1493 if (max < tmp) { max = tmp; } 1494 } 1495 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1496 } 1497 1498 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1499 lrow = row - rstart; 1500 1501 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1502 if (!v) {pvA = 0; pvB = 0;} 1503 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1504 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1505 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1506 nztot = nzA + nzB; 1507 1508 cmap = mat->garray; 1509 if (v || idx) { 1510 if (nztot) { 1511 /* Sort by increasing column numbers, assuming A and B already sorted */ 1512 PetscInt imark = -1; 1513 if (v) { 1514 *v = v_p = mat->rowvalues; 1515 for (i=0; i<nzB; i++) { 1516 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1517 else break; 1518 } 1519 imark = i; 1520 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1521 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1522 } 1523 if (idx) { 1524 *idx = idx_p = mat->rowindices; 1525 if (imark > -1) { 1526 for (i=0; i<imark; i++) { 1527 idx_p[i] = cmap[cworkB[i]]; 1528 } 1529 } else { 1530 for (i=0; i<nzB; i++) { 1531 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1532 else break; 1533 } 1534 imark = i; 1535 } 1536 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1537 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1538 } 1539 } else { 1540 if (idx) *idx = 0; 1541 if (v) *v = 0; 1542 } 1543 } 1544 *nz = nztot; 1545 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1546 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1552 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1553 { 1554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1555 1556 PetscFunctionBegin; 1557 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1558 aij->getrowactive = PETSC_FALSE; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatNorm_MPIAIJ" 1564 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1565 { 1566 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1567 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1568 PetscErrorCode ierr; 1569 PetscInt i,j,cstart = mat->cmap->rstart; 1570 PetscReal sum = 0.0; 1571 MatScalar *v; 1572 1573 PetscFunctionBegin; 1574 if (aij->size == 1) { 1575 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1576 } else { 1577 if (type == NORM_FROBENIUS) { 1578 v = amat->a; 1579 for (i=0; i<amat->nz; i++) { 1580 #if defined(PETSC_USE_COMPLEX) 1581 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1582 #else 1583 sum += (*v)*(*v); v++; 1584 #endif 1585 } 1586 v = bmat->a; 1587 for (i=0; i<bmat->nz; i++) { 1588 #if defined(PETSC_USE_COMPLEX) 1589 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1590 #else 1591 sum += (*v)*(*v); v++; 1592 #endif 1593 } 1594 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1595 *norm = sqrt(*norm); 1596 } else if (type == NORM_1) { /* max column norm */ 1597 PetscReal *tmp,*tmp2; 1598 PetscInt *jj,*garray = aij->garray; 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1600 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1601 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1602 *norm = 0.0; 1603 v = amat->a; jj = amat->j; 1604 for (j=0; j<amat->nz; j++) { 1605 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1606 } 1607 v = bmat->a; jj = bmat->j; 1608 for (j=0; j<bmat->nz; j++) { 1609 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1610 } 1611 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1612 for (j=0; j<mat->cmap->N; j++) { 1613 if (tmp2[j] > *norm) *norm = tmp2[j]; 1614 } 1615 ierr = PetscFree(tmp);CHKERRQ(ierr); 1616 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1617 } else if (type == NORM_INFINITY) { /* max row norm */ 1618 PetscReal ntemp = 0.0; 1619 for (j=0; j<aij->A->rmap->n; j++) { 1620 v = amat->a + amat->i[j]; 1621 sum = 0.0; 1622 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1623 sum += PetscAbsScalar(*v); v++; 1624 } 1625 v = bmat->a + bmat->i[j]; 1626 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1627 sum += PetscAbsScalar(*v); v++; 1628 } 1629 if (sum > ntemp) ntemp = sum; 1630 } 1631 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1632 } else { 1633 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1634 } 1635 } 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #undef __FUNCT__ 1640 #define __FUNCT__ "MatTranspose_MPIAIJ" 1641 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1642 { 1643 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1644 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1645 PetscErrorCode ierr; 1646 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1647 PetscInt cstart=A->cmap->rstart,ncol; 1648 Mat B; 1649 MatScalar *array; 1650 1651 PetscFunctionBegin; 1652 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1653 1654 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1655 ai = Aloc->i; aj = Aloc->j; 1656 bi = Bloc->i; bj = Bloc->j; 1657 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1658 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1659 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1660 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1661 for (i=0; i<ai[ma]; i++){ 1662 d_nnz[aj[i]] ++; 1663 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1664 } 1665 1666 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1667 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1668 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1669 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1670 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1671 } else { 1672 B = *matout; 1673 } 1674 1675 /* copy over the A part */ 1676 array = Aloc->a; 1677 row = A->rmap->rstart; 1678 for (i=0; i<ma; i++) { 1679 ncol = ai[i+1]-ai[i]; 1680 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1681 row++; array += ncol; aj += ncol; 1682 } 1683 aj = Aloc->j; 1684 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1685 1686 /* copy over the B part */ 1687 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1688 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1689 array = Bloc->a; 1690 row = A->rmap->rstart; 1691 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1692 cols_tmp = cols; 1693 for (i=0; i<mb; i++) { 1694 ncol = bi[i+1]-bi[i]; 1695 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1696 row++; array += ncol; cols_tmp += ncol; 1697 } 1698 ierr = PetscFree(cols);CHKERRQ(ierr); 1699 1700 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1702 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1703 *matout = B; 1704 } else { 1705 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1706 } 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1712 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1715 Mat a = aij->A,b = aij->B; 1716 PetscErrorCode ierr; 1717 PetscInt s1,s2,s3; 1718 1719 PetscFunctionBegin; 1720 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1721 if (rr) { 1722 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1723 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1724 /* Overlap communication with computation. */ 1725 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 } 1727 if (ll) { 1728 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1729 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1730 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1731 } 1732 /* scale the diagonal block */ 1733 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1734 1735 if (rr) { 1736 /* Do a scatter end and then right scale the off-diagonal block */ 1737 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1738 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1739 } 1740 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1746 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1747 { 1748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1749 PetscErrorCode ierr; 1750 1751 PetscFunctionBegin; 1752 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1753 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1755 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1760 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1761 { 1762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatEqual_MPIAIJ" 1772 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1773 { 1774 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1775 Mat a,b,c,d; 1776 PetscTruth flg; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 a = matA->A; b = matA->B; 1781 c = matB->A; d = matB->B; 1782 1783 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1784 if (flg) { 1785 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1786 } 1787 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatCopy_MPIAIJ" 1793 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1794 { 1795 PetscErrorCode ierr; 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1797 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1798 1799 PetscFunctionBegin; 1800 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1801 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1802 /* because of the column compression in the off-processor part of the matrix a->B, 1803 the number of columns in a->B and b->B may be different, hence we cannot call 1804 the MatCopy() directly on the two parts. If need be, we can provide a more 1805 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1806 then copying the submatrices */ 1807 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1808 } else { 1809 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1810 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1811 } 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1817 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1818 { 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatAXPY_MPIAIJ" 1828 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1829 { 1830 PetscErrorCode ierr; 1831 PetscInt i; 1832 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1833 PetscBLASInt bnz,one=1; 1834 Mat_SeqAIJ *x,*y; 1835 1836 PetscFunctionBegin; 1837 if (str == SAME_NONZERO_PATTERN) { 1838 PetscScalar alpha = a; 1839 x = (Mat_SeqAIJ *)xx->A->data; 1840 y = (Mat_SeqAIJ *)yy->A->data; 1841 bnz = PetscBLASIntCast(x->nz); 1842 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1843 x = (Mat_SeqAIJ *)xx->B->data; 1844 y = (Mat_SeqAIJ *)yy->B->data; 1845 bnz = PetscBLASIntCast(x->nz); 1846 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1847 } else if (str == SUBSET_NONZERO_PATTERN) { 1848 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1849 1850 x = (Mat_SeqAIJ *)xx->B->data; 1851 y = (Mat_SeqAIJ *)yy->B->data; 1852 if (y->xtoy && y->XtoY != xx->B) { 1853 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1854 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1855 } 1856 if (!y->xtoy) { /* get xtoy */ 1857 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1858 y->XtoY = xx->B; 1859 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1860 } 1861 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1862 } else { 1863 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 1868 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatConjugate_MPIAIJ" 1872 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1873 { 1874 #if defined(PETSC_USE_COMPLEX) 1875 PetscErrorCode ierr; 1876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1877 1878 PetscFunctionBegin; 1879 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1880 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1881 #else 1882 PetscFunctionBegin; 1883 #endif 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatRealPart_MPIAIJ" 1889 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1890 { 1891 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1896 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1902 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1903 { 1904 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1909 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #ifdef PETSC_HAVE_PBGL 1914 1915 #include <boost/parallel/mpi/bsp_process_group.hpp> 1916 #include <boost/graph/distributed/ilu_default_graph.hpp> 1917 #include <boost/graph/distributed/ilu_0_block.hpp> 1918 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1919 #include <boost/graph/distributed/petsc/interface.hpp> 1920 #include <boost/multi_array.hpp> 1921 #include <boost/parallel/distributed_property_map->hpp> 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1925 /* 1926 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1927 */ 1928 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1929 { 1930 namespace petsc = boost::distributed::petsc; 1931 1932 namespace graph_dist = boost::graph::distributed; 1933 using boost::graph::distributed::ilu_default::process_group_type; 1934 using boost::graph::ilu_permuted; 1935 1936 PetscTruth row_identity, col_identity; 1937 PetscContainer c; 1938 PetscInt m, n, M, N; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1943 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1944 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1945 if (!row_identity || !col_identity) { 1946 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1947 } 1948 1949 process_group_type pg; 1950 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1951 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1952 lgraph_type& level_graph = *lgraph_p; 1953 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1954 1955 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1956 ilu_permuted(level_graph); 1957 1958 /* put together the new matrix */ 1959 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1960 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1961 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1962 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1963 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1964 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1966 1967 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1968 ierr = PetscContainerSetPointer(c, lgraph_p); 1969 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1975 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1976 { 1977 PetscFunctionBegin; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatSolve_MPIAIJ" 1983 /* 1984 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1985 */ 1986 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1987 { 1988 namespace graph_dist = boost::graph::distributed; 1989 1990 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1991 lgraph_type* lgraph_p; 1992 PetscContainer c; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1997 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1998 ierr = VecCopy(b, x);CHKERRQ(ierr); 1999 2000 PetscScalar* array_x; 2001 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2002 PetscInt sx; 2003 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2004 2005 PetscScalar* array_b; 2006 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2007 PetscInt sb; 2008 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2009 2010 lgraph_type& level_graph = *lgraph_p; 2011 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2012 2013 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2014 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2015 ref_x(array_x, boost::extents[num_vertices(graph)]); 2016 2017 typedef boost::iterator_property_map<array_ref_type::iterator, 2018 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2019 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2020 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2021 2022 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2023 2024 PetscFunctionReturn(0); 2025 } 2026 #endif 2027 2028 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2029 PetscInt nzlocal,nsends,nrecvs; 2030 PetscMPIInt *send_rank,*recv_rank; 2031 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2032 PetscScalar *sbuf_a,**rbuf_a; 2033 PetscErrorCode (*MatDestroy)(Mat); 2034 } Mat_Redundant; 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2038 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2039 { 2040 PetscErrorCode ierr; 2041 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2042 PetscInt i; 2043 2044 PetscFunctionBegin; 2045 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2046 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2047 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2048 for (i=0; i<redund->nrecvs; i++){ 2049 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2050 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2051 } 2052 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2053 ierr = PetscFree(redund);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatDestroy_MatRedundant" 2059 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2060 { 2061 PetscErrorCode ierr; 2062 PetscContainer container; 2063 Mat_Redundant *redund=PETSC_NULL; 2064 2065 PetscFunctionBegin; 2066 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2067 if (container) { 2068 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2069 } else { 2070 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2071 } 2072 A->ops->destroy = redund->MatDestroy; 2073 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2074 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2075 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2081 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2082 { 2083 PetscMPIInt rank,size; 2084 MPI_Comm comm=((PetscObject)mat)->comm; 2085 PetscErrorCode ierr; 2086 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2087 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2088 PetscInt *rowrange=mat->rmap->range; 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat A=aij->A,B=aij->B,C=*matredundant; 2091 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2092 PetscScalar *sbuf_a; 2093 PetscInt nzlocal=a->nz+b->nz; 2094 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2095 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2096 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2097 MatScalar *aworkA,*aworkB; 2098 PetscScalar *vals; 2099 PetscMPIInt tag1,tag2,tag3,imdex; 2100 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2101 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2102 MPI_Status recv_status,*send_status; 2103 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2104 PetscInt **rbuf_j=PETSC_NULL; 2105 PetscScalar **rbuf_a=PETSC_NULL; 2106 Mat_Redundant *redund=PETSC_NULL; 2107 PetscContainer container; 2108 2109 PetscFunctionBegin; 2110 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2111 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2112 2113 if (reuse == MAT_REUSE_MATRIX) { 2114 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2115 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2116 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2117 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2118 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2119 if (container) { 2120 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2121 } else { 2122 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2123 } 2124 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2125 2126 nsends = redund->nsends; 2127 nrecvs = redund->nrecvs; 2128 send_rank = redund->send_rank; 2129 recv_rank = redund->recv_rank; 2130 sbuf_nz = redund->sbuf_nz; 2131 rbuf_nz = redund->rbuf_nz; 2132 sbuf_j = redund->sbuf_j; 2133 sbuf_a = redund->sbuf_a; 2134 rbuf_j = redund->rbuf_j; 2135 rbuf_a = redund->rbuf_a; 2136 } 2137 2138 if (reuse == MAT_INITIAL_MATRIX){ 2139 PetscMPIInt subrank,subsize; 2140 PetscInt nleftover,np_subcomm; 2141 /* get the destination processors' id send_rank, nsends and nrecvs */ 2142 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2143 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2144 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2145 np_subcomm = size/nsubcomm; 2146 nleftover = size - nsubcomm*np_subcomm; 2147 nsends = 0; nrecvs = 0; 2148 for (i=0; i<size; i++){ /* i=rank*/ 2149 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2150 send_rank[nsends] = i; nsends++; 2151 recv_rank[nrecvs++] = i; 2152 } 2153 } 2154 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2155 i = size-nleftover-1; 2156 j = 0; 2157 while (j < nsubcomm - nleftover){ 2158 send_rank[nsends++] = i; 2159 i--; j++; 2160 } 2161 } 2162 2163 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2164 for (i=0; i<nleftover; i++){ 2165 recv_rank[nrecvs++] = size-nleftover+i; 2166 } 2167 } 2168 2169 /* allocate sbuf_j, sbuf_a */ 2170 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2171 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2172 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2173 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2174 2175 /* copy mat's local entries into the buffers */ 2176 if (reuse == MAT_INITIAL_MATRIX){ 2177 rownz_max = 0; 2178 rptr = sbuf_j; 2179 cols = sbuf_j + rend-rstart + 1; 2180 vals = sbuf_a; 2181 rptr[0] = 0; 2182 for (i=0; i<rend-rstart; i++){ 2183 row = i + rstart; 2184 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2185 ncols = nzA + nzB; 2186 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2187 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2188 /* load the column indices for this row into cols */ 2189 lwrite = 0; 2190 for (l=0; l<nzB; l++) { 2191 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2192 vals[lwrite] = aworkB[l]; 2193 cols[lwrite++] = ctmp; 2194 } 2195 } 2196 for (l=0; l<nzA; l++){ 2197 vals[lwrite] = aworkA[l]; 2198 cols[lwrite++] = cstart + cworkA[l]; 2199 } 2200 for (l=0; l<nzB; l++) { 2201 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2202 vals[lwrite] = aworkB[l]; 2203 cols[lwrite++] = ctmp; 2204 } 2205 } 2206 vals += ncols; 2207 cols += ncols; 2208 rptr[i+1] = rptr[i] + ncols; 2209 if (rownz_max < ncols) rownz_max = ncols; 2210 } 2211 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2212 } else { /* only copy matrix values into sbuf_a */ 2213 rptr = sbuf_j; 2214 vals = sbuf_a; 2215 rptr[0] = 0; 2216 for (i=0; i<rend-rstart; i++){ 2217 row = i + rstart; 2218 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2219 ncols = nzA + nzB; 2220 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2221 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2222 lwrite = 0; 2223 for (l=0; l<nzB; l++) { 2224 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2225 } 2226 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2227 for (l=0; l<nzB; l++) { 2228 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2229 } 2230 vals += ncols; 2231 rptr[i+1] = rptr[i] + ncols; 2232 } 2233 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2234 2235 /* send nzlocal to others, and recv other's nzlocal */ 2236 /*--------------------------------------------------*/ 2237 if (reuse == MAT_INITIAL_MATRIX){ 2238 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2239 s_waits2 = s_waits3 + nsends; 2240 s_waits1 = s_waits2 + nsends; 2241 r_waits1 = s_waits1 + nsends; 2242 r_waits2 = r_waits1 + nrecvs; 2243 r_waits3 = r_waits2 + nrecvs; 2244 } else { 2245 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2246 r_waits3 = s_waits3 + nsends; 2247 } 2248 2249 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2250 if (reuse == MAT_INITIAL_MATRIX){ 2251 /* get new tags to keep the communication clean */ 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2253 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2254 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2255 2256 /* post receives of other's nzlocal */ 2257 for (i=0; i<nrecvs; i++){ 2258 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2259 } 2260 /* send nzlocal to others */ 2261 for (i=0; i<nsends; i++){ 2262 sbuf_nz[i] = nzlocal; 2263 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2264 } 2265 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2266 count = nrecvs; 2267 while (count) { 2268 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2269 recv_rank[imdex] = recv_status.MPI_SOURCE; 2270 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2271 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2272 2273 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2274 rbuf_nz[imdex] += i + 2; 2275 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2276 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2277 count--; 2278 } 2279 /* wait on sends of nzlocal */ 2280 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2281 /* send mat->i,j to others, and recv from other's */ 2282 /*------------------------------------------------*/ 2283 for (i=0; i<nsends; i++){ 2284 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2285 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2286 } 2287 /* wait on receives of mat->i,j */ 2288 /*------------------------------*/ 2289 count = nrecvs; 2290 while (count) { 2291 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2292 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2293 count--; 2294 } 2295 /* wait on sends of mat->i,j */ 2296 /*---------------------------*/ 2297 if (nsends) { 2298 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2299 } 2300 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2301 2302 /* post receives, send and receive mat->a */ 2303 /*----------------------------------------*/ 2304 for (imdex=0; imdex<nrecvs; imdex++) { 2305 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2306 } 2307 for (i=0; i<nsends; i++){ 2308 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2309 } 2310 count = nrecvs; 2311 while (count) { 2312 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2313 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2314 count--; 2315 } 2316 if (nsends) { 2317 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2318 } 2319 2320 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2321 2322 /* create redundant matrix */ 2323 /*-------------------------*/ 2324 if (reuse == MAT_INITIAL_MATRIX){ 2325 /* compute rownz_max for preallocation */ 2326 for (imdex=0; imdex<nrecvs; imdex++){ 2327 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2328 rptr = rbuf_j[imdex]; 2329 for (i=0; i<j; i++){ 2330 ncols = rptr[i+1] - rptr[i]; 2331 if (rownz_max < ncols) rownz_max = ncols; 2332 } 2333 } 2334 2335 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2336 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2337 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2338 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2339 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2340 } else { 2341 C = *matredundant; 2342 } 2343 2344 /* insert local matrix entries */ 2345 rptr = sbuf_j; 2346 cols = sbuf_j + rend-rstart + 1; 2347 vals = sbuf_a; 2348 for (i=0; i<rend-rstart; i++){ 2349 row = i + rstart; 2350 ncols = rptr[i+1] - rptr[i]; 2351 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2352 vals += ncols; 2353 cols += ncols; 2354 } 2355 /* insert received matrix entries */ 2356 for (imdex=0; imdex<nrecvs; imdex++){ 2357 rstart = rowrange[recv_rank[imdex]]; 2358 rend = rowrange[recv_rank[imdex]+1]; 2359 rptr = rbuf_j[imdex]; 2360 cols = rbuf_j[imdex] + rend-rstart + 1; 2361 vals = rbuf_a[imdex]; 2362 for (i=0; i<rend-rstart; i++){ 2363 row = i + rstart; 2364 ncols = rptr[i+1] - rptr[i]; 2365 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2366 vals += ncols; 2367 cols += ncols; 2368 } 2369 } 2370 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2373 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2374 if (reuse == MAT_INITIAL_MATRIX){ 2375 PetscContainer container; 2376 *matredundant = C; 2377 /* create a supporting struct and attach it to C for reuse */ 2378 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2379 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2380 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2381 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2382 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2383 2384 redund->nzlocal = nzlocal; 2385 redund->nsends = nsends; 2386 redund->nrecvs = nrecvs; 2387 redund->send_rank = send_rank; 2388 redund->recv_rank = recv_rank; 2389 redund->sbuf_nz = sbuf_nz; 2390 redund->rbuf_nz = rbuf_nz; 2391 redund->sbuf_j = sbuf_j; 2392 redund->sbuf_a = sbuf_a; 2393 redund->rbuf_j = rbuf_j; 2394 redund->rbuf_a = rbuf_a; 2395 2396 redund->MatDestroy = C->ops->destroy; 2397 C->ops->destroy = MatDestroy_MatRedundant; 2398 } 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2404 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2405 { 2406 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2407 PetscErrorCode ierr; 2408 PetscInt i,*idxb = 0; 2409 PetscScalar *va,*vb; 2410 Vec vtmp; 2411 2412 PetscFunctionBegin; 2413 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2414 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2415 if (idx) { 2416 for (i=0; i<A->rmap->n; i++) { 2417 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2418 } 2419 } 2420 2421 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2422 if (idx) { 2423 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2424 } 2425 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2426 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2427 2428 for (i=0; i<A->rmap->n; i++){ 2429 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2430 va[i] = vb[i]; 2431 if (idx) idx[i] = a->garray[idxb[i]]; 2432 } 2433 } 2434 2435 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2436 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2437 if (idxb) { 2438 ierr = PetscFree(idxb);CHKERRQ(ierr); 2439 } 2440 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2446 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2447 { 2448 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2449 PetscErrorCode ierr; 2450 PetscInt i,*idxb = 0; 2451 PetscScalar *va,*vb; 2452 Vec vtmp; 2453 2454 PetscFunctionBegin; 2455 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2456 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2457 if (idx) { 2458 for (i=0; i<A->cmap->n; i++) { 2459 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2460 } 2461 } 2462 2463 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2464 if (idx) { 2465 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2466 } 2467 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2468 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2469 2470 for (i=0; i<A->rmap->n; i++){ 2471 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2472 va[i] = vb[i]; 2473 if (idx) idx[i] = a->garray[idxb[i]]; 2474 } 2475 } 2476 2477 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2478 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2479 if (idxb) { 2480 ierr = PetscFree(idxb);CHKERRQ(ierr); 2481 } 2482 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2488 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2489 { 2490 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2491 PetscInt n = A->rmap->n; 2492 PetscInt cstart = A->cmap->rstart; 2493 PetscInt *cmap = mat->garray; 2494 PetscInt *diagIdx, *offdiagIdx; 2495 Vec diagV, offdiagV; 2496 PetscScalar *a, *diagA, *offdiagA; 2497 PetscInt r; 2498 PetscErrorCode ierr; 2499 2500 PetscFunctionBegin; 2501 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2503 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2505 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2506 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2507 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2508 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2509 for(r = 0; r < n; ++r) { 2510 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2511 a[r] = diagA[r]; 2512 idx[r] = cstart + diagIdx[r]; 2513 } else { 2514 a[r] = offdiagA[r]; 2515 idx[r] = cmap[offdiagIdx[r]]; 2516 } 2517 } 2518 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2520 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2521 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2522 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2523 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2529 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2530 { 2531 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2532 PetscInt n = A->rmap->n; 2533 PetscInt cstart = A->cmap->rstart; 2534 PetscInt *cmap = mat->garray; 2535 PetscInt *diagIdx, *offdiagIdx; 2536 Vec diagV, offdiagV; 2537 PetscScalar *a, *diagA, *offdiagA; 2538 PetscInt r; 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2544 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2546 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2547 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2548 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2549 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2550 for(r = 0; r < n; ++r) { 2551 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2552 a[r] = diagA[r]; 2553 idx[r] = cstart + diagIdx[r]; 2554 } else { 2555 a[r] = offdiagA[r]; 2556 idx[r] = cmap[offdiagIdx[r]]; 2557 } 2558 } 2559 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2561 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2562 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2563 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2564 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 #undef __FUNCT__ 2569 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2570 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2571 { 2572 PetscErrorCode ierr; 2573 Mat *dummy; 2574 2575 PetscFunctionBegin; 2576 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2577 *newmat = *dummy; 2578 ierr = PetscFree(dummy);CHKERRQ(ierr); 2579 PetscFunctionReturn(0); 2580 } 2581 2582 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2583 /* -------------------------------------------------------------------*/ 2584 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2585 MatGetRow_MPIAIJ, 2586 MatRestoreRow_MPIAIJ, 2587 MatMult_MPIAIJ, 2588 /* 4*/ MatMultAdd_MPIAIJ, 2589 MatMultTranspose_MPIAIJ, 2590 MatMultTransposeAdd_MPIAIJ, 2591 #ifdef PETSC_HAVE_PBGL 2592 MatSolve_MPIAIJ, 2593 #else 2594 0, 2595 #endif 2596 0, 2597 0, 2598 /*10*/ 0, 2599 0, 2600 0, 2601 MatSOR_MPIAIJ, 2602 MatTranspose_MPIAIJ, 2603 /*15*/ MatGetInfo_MPIAIJ, 2604 MatEqual_MPIAIJ, 2605 MatGetDiagonal_MPIAIJ, 2606 MatDiagonalScale_MPIAIJ, 2607 MatNorm_MPIAIJ, 2608 /*20*/ MatAssemblyBegin_MPIAIJ, 2609 MatAssemblyEnd_MPIAIJ, 2610 MatSetOption_MPIAIJ, 2611 MatZeroEntries_MPIAIJ, 2612 /*24*/ MatZeroRows_MPIAIJ, 2613 0, 2614 #ifdef PETSC_HAVE_PBGL 2615 0, 2616 #else 2617 0, 2618 #endif 2619 0, 2620 0, 2621 /*29*/ MatSetUpPreallocation_MPIAIJ, 2622 #ifdef PETSC_HAVE_PBGL 2623 0, 2624 #else 2625 0, 2626 #endif 2627 0, 2628 0, 2629 0, 2630 /*34*/ MatDuplicate_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*39*/ MatAXPY_MPIAIJ, 2636 MatGetSubMatrices_MPIAIJ, 2637 MatIncreaseOverlap_MPIAIJ, 2638 MatGetValues_MPIAIJ, 2639 MatCopy_MPIAIJ, 2640 /*44*/ MatGetRowMax_MPIAIJ, 2641 MatScale_MPIAIJ, 2642 0, 2643 0, 2644 0, 2645 /*49*/ MatSetBlockSize_MPIAIJ, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*54*/ MatFDColoringCreate_MPIAIJ, 2651 0, 2652 MatSetUnfactored_MPIAIJ, 2653 MatPermute_MPIAIJ, 2654 0, 2655 /*59*/ MatGetSubMatrix_MPIAIJ, 2656 MatDestroy_MPIAIJ, 2657 MatView_MPIAIJ, 2658 0, 2659 0, 2660 /*64*/ 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2666 MatGetRowMinAbs_MPIAIJ, 2667 0, 2668 MatSetColoring_MPIAIJ, 2669 #if defined(PETSC_HAVE_ADIC) 2670 MatSetValuesAdic_MPIAIJ, 2671 #else 2672 0, 2673 #endif 2674 MatSetValuesAdifor_MPIAIJ, 2675 /*75*/ MatFDColoringApply_AIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*80*/ 0, 2681 0, 2682 0, 2683 /*83*/ MatLoad_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 0, 2688 0, 2689 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2690 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2691 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2692 MatPtAP_Basic, 2693 MatPtAPSymbolic_MPIAIJ, 2694 /*94*/ MatPtAPNumeric_MPIAIJ, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*99*/ 0, 2700 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2701 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2702 MatConjugate_MPIAIJ, 2703 0, 2704 /*104*/MatSetValuesRow_MPIAIJ, 2705 MatRealPart_MPIAIJ, 2706 MatImaginaryPart_MPIAIJ, 2707 0, 2708 0, 2709 /*109*/0, 2710 MatGetRedundantMatrix_MPIAIJ, 2711 MatGetRowMin_MPIAIJ, 2712 0, 2713 0, 2714 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2715 0, 2716 0, 2717 0, 2718 0, 2719 /*119*/0, 2720 0, 2721 0, 2722 0, 2723 MatGetMultiProcBlock_MPIAIJ 2724 }; 2725 2726 /* ----------------------------------------------------------------------------------------*/ 2727 2728 EXTERN_C_BEGIN 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2731 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2732 { 2733 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2734 PetscErrorCode ierr; 2735 2736 PetscFunctionBegin; 2737 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2738 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 EXTERN_C_END 2742 2743 EXTERN_C_BEGIN 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2746 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2747 { 2748 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2753 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 EXTERN_C_END 2757 2758 EXTERN_C_BEGIN 2759 #undef __FUNCT__ 2760 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2762 { 2763 Mat_MPIAIJ *b; 2764 PetscErrorCode ierr; 2765 PetscInt i; 2766 2767 PetscFunctionBegin; 2768 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2769 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2770 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2771 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2772 2773 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2774 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2775 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2776 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2777 if (d_nnz) { 2778 for (i=0; i<B->rmap->n; i++) { 2779 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2780 } 2781 } 2782 if (o_nnz) { 2783 for (i=0; i<B->rmap->n; i++) { 2784 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2785 } 2786 } 2787 b = (Mat_MPIAIJ*)B->data; 2788 2789 if (!B->preallocated) { 2790 /* Explicitly create 2 MATSEQAIJ matrices. */ 2791 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2792 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2793 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2795 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2796 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2797 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2798 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2799 } 2800 2801 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2802 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2803 B->preallocated = PETSC_TRUE; 2804 PetscFunctionReturn(0); 2805 } 2806 EXTERN_C_END 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2810 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2811 { 2812 Mat mat; 2813 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2814 PetscErrorCode ierr; 2815 2816 PetscFunctionBegin; 2817 *newmat = 0; 2818 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2819 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2820 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2821 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2822 a = (Mat_MPIAIJ*)mat->data; 2823 2824 mat->factortype = matin->factortype; 2825 mat->rmap->bs = matin->rmap->bs; 2826 mat->assembled = PETSC_TRUE; 2827 mat->insertmode = NOT_SET_VALUES; 2828 mat->preallocated = PETSC_TRUE; 2829 2830 a->size = oldmat->size; 2831 a->rank = oldmat->rank; 2832 a->donotstash = oldmat->donotstash; 2833 a->roworiented = oldmat->roworiented; 2834 a->rowindices = 0; 2835 a->rowvalues = 0; 2836 a->getrowactive = PETSC_FALSE; 2837 2838 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2839 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2840 2841 if (oldmat->colmap) { 2842 #if defined (PETSC_USE_CTABLE) 2843 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2844 #else 2845 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2846 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2847 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2848 #endif 2849 } else a->colmap = 0; 2850 if (oldmat->garray) { 2851 PetscInt len; 2852 len = oldmat->B->cmap->n; 2853 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2854 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2855 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2856 } else a->garray = 0; 2857 2858 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2860 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2862 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2863 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2864 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2865 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2866 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2867 *newmat = mat; 2868 PetscFunctionReturn(0); 2869 } 2870 2871 /* 2872 Allows sending/receiving larger messages then 2 gigabytes in a single call 2873 */ 2874 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2875 { 2876 int ierr; 2877 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2878 PetscInt i,numchunks; 2879 PetscMPIInt icnt; 2880 2881 numchunks = cnt/CHUNKSIZE + 1; 2882 for (i=0; i<numchunks; i++) { 2883 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2884 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2885 if (type == MPIU_INT) { 2886 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2887 } else if (type == MPIU_SCALAR) { 2888 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2889 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2890 } 2891 return 0; 2892 } 2893 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2894 { 2895 int ierr; 2896 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2897 MPI_Status status; 2898 PetscInt i,numchunks; 2899 PetscMPIInt icnt; 2900 2901 numchunks = cnt/CHUNKSIZE + 1; 2902 for (i=0; i<numchunks; i++) { 2903 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2904 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2905 if (type == MPIU_INT) { 2906 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2907 } else if (type == MPIU_SCALAR) { 2908 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2909 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2910 } 2911 return 0; 2912 } 2913 2914 #undef __FUNCT__ 2915 #define __FUNCT__ "MatLoad_MPIAIJ" 2916 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2917 { 2918 PetscScalar *vals,*svals; 2919 MPI_Comm comm = ((PetscObject)viewer)->comm; 2920 PetscErrorCode ierr; 2921 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2922 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2923 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2924 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2925 PetscInt cend,cstart,n,*rowners,sizesset=1; 2926 int fd; 2927 2928 PetscFunctionBegin; 2929 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2930 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2931 if (!rank) { 2932 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2933 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2934 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2935 } 2936 2937 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2938 2939 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2940 M = header[1]; N = header[2]; 2941 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2942 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2943 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2944 2945 /* If global sizes are set, check if they are consistent with that given in the file */ 2946 if (sizesset) { 2947 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2948 } 2949 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2950 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2951 2952 /* determine ownership of all rows */ 2953 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2954 else m = newMat->rmap->n; /* Set by user */ 2955 2956 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2957 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2958 2959 /* First process needs enough room for process with most rows */ 2960 if (!rank) { 2961 mmax = rowners[1]; 2962 for (i=2; i<size; i++) { 2963 mmax = PetscMax(mmax,rowners[i]); 2964 } 2965 } else mmax = m; 2966 2967 rowners[0] = 0; 2968 for (i=2; i<=size; i++) { 2969 rowners[i] += rowners[i-1]; 2970 } 2971 rstart = rowners[rank]; 2972 rend = rowners[rank+1]; 2973 2974 /* distribute row lengths to all processors */ 2975 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2976 if (!rank) { 2977 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2978 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2979 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2980 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2981 for (j=0; j<m; j++) { 2982 procsnz[0] += ourlens[j]; 2983 } 2984 for (i=1; i<size; i++) { 2985 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2986 /* calculate the number of nonzeros on each processor */ 2987 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2988 procsnz[i] += rowlengths[j]; 2989 } 2990 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2991 } 2992 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2993 } else { 2994 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2995 } 2996 2997 if (!rank) { 2998 /* determine max buffer needed and allocate it */ 2999 maxnz = 0; 3000 for (i=0; i<size; i++) { 3001 maxnz = PetscMax(maxnz,procsnz[i]); 3002 } 3003 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3004 3005 /* read in my part of the matrix column indices */ 3006 nz = procsnz[0]; 3007 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3008 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3009 3010 /* read in every one elses and ship off */ 3011 for (i=1; i<size; i++) { 3012 nz = procsnz[i]; 3013 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3014 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3015 } 3016 ierr = PetscFree(cols);CHKERRQ(ierr); 3017 } else { 3018 /* determine buffer space needed for message */ 3019 nz = 0; 3020 for (i=0; i<m; i++) { 3021 nz += ourlens[i]; 3022 } 3023 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3024 3025 /* receive message of column indices*/ 3026 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3027 } 3028 3029 /* determine column ownership if matrix is not square */ 3030 if (N != M) { 3031 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3032 else n = newMat->cmap->n; 3033 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3034 cstart = cend - n; 3035 } else { 3036 cstart = rstart; 3037 cend = rend; 3038 n = cend - cstart; 3039 } 3040 3041 /* loop over local rows, determining number of off diagonal entries */ 3042 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3043 jj = 0; 3044 for (i=0; i<m; i++) { 3045 for (j=0; j<ourlens[i]; j++) { 3046 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3047 jj++; 3048 } 3049 } 3050 3051 for (i=0; i<m; i++) { 3052 ourlens[i] -= offlens[i]; 3053 } 3054 if (!sizesset) { 3055 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3056 } 3057 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3058 3059 for (i=0; i<m; i++) { 3060 ourlens[i] += offlens[i]; 3061 } 3062 3063 if (!rank) { 3064 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3065 3066 /* read in my part of the matrix numerical values */ 3067 nz = procsnz[0]; 3068 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3069 3070 /* insert into matrix */ 3071 jj = rstart; 3072 smycols = mycols; 3073 svals = vals; 3074 for (i=0; i<m; i++) { 3075 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3076 smycols += ourlens[i]; 3077 svals += ourlens[i]; 3078 jj++; 3079 } 3080 3081 /* read in other processors and ship out */ 3082 for (i=1; i<size; i++) { 3083 nz = procsnz[i]; 3084 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3085 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3086 } 3087 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3088 } else { 3089 /* receive numeric values */ 3090 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3091 3092 /* receive message of values*/ 3093 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3094 3095 /* insert into matrix */ 3096 jj = rstart; 3097 smycols = mycols; 3098 svals = vals; 3099 for (i=0; i<m; i++) { 3100 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3101 smycols += ourlens[i]; 3102 svals += ourlens[i]; 3103 jj++; 3104 } 3105 } 3106 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3107 ierr = PetscFree(vals);CHKERRQ(ierr); 3108 ierr = PetscFree(mycols);CHKERRQ(ierr); 3109 ierr = PetscFree(rowners);CHKERRQ(ierr); 3110 3111 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 #undef __FUNCT__ 3117 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3118 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3119 { 3120 PetscErrorCode ierr; 3121 IS iscol_local; 3122 PetscInt csize; 3123 3124 PetscFunctionBegin; 3125 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3126 if (call == MAT_REUSE_MATRIX) { 3127 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3128 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3129 } else { 3130 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3131 } 3132 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3133 if (call == MAT_INITIAL_MATRIX) { 3134 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3135 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3136 } 3137 PetscFunctionReturn(0); 3138 } 3139 3140 #undef __FUNCT__ 3141 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3142 /* 3143 Not great since it makes two copies of the submatrix, first an SeqAIJ 3144 in local and then by concatenating the local matrices the end result. 3145 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3146 3147 Note: This requires a sequential iscol with all indices. 3148 */ 3149 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3150 { 3151 PetscErrorCode ierr; 3152 PetscMPIInt rank,size; 3153 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3154 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3155 Mat *local,M,Mreuse; 3156 MatScalar *vwork,*aa; 3157 MPI_Comm comm = ((PetscObject)mat)->comm; 3158 Mat_SeqAIJ *aij; 3159 3160 3161 PetscFunctionBegin; 3162 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3164 3165 if (call == MAT_REUSE_MATRIX) { 3166 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3167 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3168 local = &Mreuse; 3169 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3170 } else { 3171 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3172 Mreuse = *local; 3173 ierr = PetscFree(local);CHKERRQ(ierr); 3174 } 3175 3176 /* 3177 m - number of local rows 3178 n - number of columns (same on all processors) 3179 rstart - first row in new global matrix generated 3180 */ 3181 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3182 if (call == MAT_INITIAL_MATRIX) { 3183 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3184 ii = aij->i; 3185 jj = aij->j; 3186 3187 /* 3188 Determine the number of non-zeros in the diagonal and off-diagonal 3189 portions of the matrix in order to do correct preallocation 3190 */ 3191 3192 /* first get start and end of "diagonal" columns */ 3193 if (csize == PETSC_DECIDE) { 3194 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3195 if (mglobal == n) { /* square matrix */ 3196 nlocal = m; 3197 } else { 3198 nlocal = n/size + ((n % size) > rank); 3199 } 3200 } else { 3201 nlocal = csize; 3202 } 3203 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3204 rstart = rend - nlocal; 3205 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3206 3207 /* next, compute all the lengths */ 3208 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3209 olens = dlens + m; 3210 for (i=0; i<m; i++) { 3211 jend = ii[i+1] - ii[i]; 3212 olen = 0; 3213 dlen = 0; 3214 for (j=0; j<jend; j++) { 3215 if (*jj < rstart || *jj >= rend) olen++; 3216 else dlen++; 3217 jj++; 3218 } 3219 olens[i] = olen; 3220 dlens[i] = dlen; 3221 } 3222 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3223 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3224 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3225 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3226 ierr = PetscFree(dlens);CHKERRQ(ierr); 3227 } else { 3228 PetscInt ml,nl; 3229 3230 M = *newmat; 3231 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3232 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3233 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3234 /* 3235 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3236 rather than the slower MatSetValues(). 3237 */ 3238 M->was_assembled = PETSC_TRUE; 3239 M->assembled = PETSC_FALSE; 3240 } 3241 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3242 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3243 ii = aij->i; 3244 jj = aij->j; 3245 aa = aij->a; 3246 for (i=0; i<m; i++) { 3247 row = rstart + i; 3248 nz = ii[i+1] - ii[i]; 3249 cwork = jj; jj += nz; 3250 vwork = aa; aa += nz; 3251 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3252 } 3253 3254 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3255 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3256 *newmat = M; 3257 3258 /* save submatrix used in processor for next request */ 3259 if (call == MAT_INITIAL_MATRIX) { 3260 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3261 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3262 } 3263 3264 PetscFunctionReturn(0); 3265 } 3266 3267 EXTERN_C_BEGIN 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3270 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3271 { 3272 PetscInt m,cstart, cend,j,nnz,i,d; 3273 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3274 const PetscInt *JJ; 3275 PetscScalar *values; 3276 PetscErrorCode ierr; 3277 3278 PetscFunctionBegin; 3279 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3280 3281 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3282 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3283 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3284 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3285 m = B->rmap->n; 3286 cstart = B->cmap->rstart; 3287 cend = B->cmap->rend; 3288 rstart = B->rmap->rstart; 3289 3290 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3291 3292 #if defined(PETSC_USE_DEBUGGING) 3293 for (i=0; i<m; i++) { 3294 nnz = Ii[i+1]- Ii[i]; 3295 JJ = J + Ii[i]; 3296 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3297 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3298 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3299 } 3300 #endif 3301 3302 for (i=0; i<m; i++) { 3303 nnz = Ii[i+1]- Ii[i]; 3304 JJ = J + Ii[i]; 3305 nnz_max = PetscMax(nnz_max,nnz); 3306 d = 0; 3307 for (j=0; j<nnz; j++) { 3308 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3309 } 3310 d_nnz[i] = d; 3311 o_nnz[i] = nnz - d; 3312 } 3313 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3314 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3315 3316 if (v) values = (PetscScalar*)v; 3317 else { 3318 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3319 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3320 } 3321 3322 for (i=0; i<m; i++) { 3323 ii = i + rstart; 3324 nnz = Ii[i+1]- Ii[i]; 3325 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3326 } 3327 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 3330 if (!v) { 3331 ierr = PetscFree(values);CHKERRQ(ierr); 3332 } 3333 PetscFunctionReturn(0); 3334 } 3335 EXTERN_C_END 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3339 /*@ 3340 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3341 (the default parallel PETSc format). 3342 3343 Collective on MPI_Comm 3344 3345 Input Parameters: 3346 + B - the matrix 3347 . i - the indices into j for the start of each local row (starts with zero) 3348 . j - the column indices for each local row (starts with zero) 3349 - v - optional values in the matrix 3350 3351 Level: developer 3352 3353 Notes: 3354 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3355 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3356 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3357 3358 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3359 3360 The format which is used for the sparse matrix input, is equivalent to a 3361 row-major ordering.. i.e for the following matrix, the input data expected is 3362 as shown: 3363 3364 1 0 0 3365 2 0 3 P0 3366 ------- 3367 4 5 6 P1 3368 3369 Process0 [P0]: rows_owned=[0,1] 3370 i = {0,1,3} [size = nrow+1 = 2+1] 3371 j = {0,0,2} [size = nz = 6] 3372 v = {1,2,3} [size = nz = 6] 3373 3374 Process1 [P1]: rows_owned=[2] 3375 i = {0,3} [size = nrow+1 = 1+1] 3376 j = {0,1,2} [size = nz = 6] 3377 v = {4,5,6} [size = nz = 6] 3378 3379 .keywords: matrix, aij, compressed row, sparse, parallel 3380 3381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3382 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3383 @*/ 3384 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3385 { 3386 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3387 3388 PetscFunctionBegin; 3389 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3390 if (f) { 3391 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3392 } 3393 PetscFunctionReturn(0); 3394 } 3395 3396 #undef __FUNCT__ 3397 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3398 /*@C 3399 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3400 (the default parallel PETSc format). For good matrix assembly performance 3401 the user should preallocate the matrix storage by setting the parameters 3402 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3403 performance can be increased by more than a factor of 50. 3404 3405 Collective on MPI_Comm 3406 3407 Input Parameters: 3408 + A - the matrix 3409 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3410 (same value is used for all local rows) 3411 . d_nnz - array containing the number of nonzeros in the various rows of the 3412 DIAGONAL portion of the local submatrix (possibly different for each row) 3413 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3414 The size of this array is equal to the number of local rows, i.e 'm'. 3415 You must leave room for the diagonal entry even if it is zero. 3416 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3417 submatrix (same value is used for all local rows). 3418 - o_nnz - array containing the number of nonzeros in the various rows of the 3419 OFF-DIAGONAL portion of the local submatrix (possibly different for 3420 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3421 structure. The size of this array is equal to the number 3422 of local rows, i.e 'm'. 3423 3424 If the *_nnz parameter is given then the *_nz parameter is ignored 3425 3426 The AIJ format (also called the Yale sparse matrix format or 3427 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3428 storage. The stored row and column indices begin with zero. 3429 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3430 3431 The parallel matrix is partitioned such that the first m0 rows belong to 3432 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3433 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3434 3435 The DIAGONAL portion of the local submatrix of a processor can be defined 3436 as the submatrix which is obtained by extraction the part corresponding to 3437 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3438 first row that belongs to the processor, r2 is the last row belonging to 3439 the this processor, and c1-c2 is range of indices of the local part of a 3440 vector suitable for applying the matrix to. This is an mxn matrix. In the 3441 common case of a square matrix, the row and column ranges are the same and 3442 the DIAGONAL part is also square. The remaining portion of the local 3443 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3444 3445 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3446 3447 You can call MatGetInfo() to get information on how effective the preallocation was; 3448 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3449 You can also run with the option -info and look for messages with the string 3450 malloc in them to see if additional memory allocation was needed. 3451 3452 Example usage: 3453 3454 Consider the following 8x8 matrix with 34 non-zero values, that is 3455 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3456 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3457 as follows: 3458 3459 .vb 3460 1 2 0 | 0 3 0 | 0 4 3461 Proc0 0 5 6 | 7 0 0 | 8 0 3462 9 0 10 | 11 0 0 | 12 0 3463 ------------------------------------- 3464 13 0 14 | 15 16 17 | 0 0 3465 Proc1 0 18 0 | 19 20 21 | 0 0 3466 0 0 0 | 22 23 0 | 24 0 3467 ------------------------------------- 3468 Proc2 25 26 27 | 0 0 28 | 29 0 3469 30 0 0 | 31 32 33 | 0 34 3470 .ve 3471 3472 This can be represented as a collection of submatrices as: 3473 3474 .vb 3475 A B C 3476 D E F 3477 G H I 3478 .ve 3479 3480 Where the submatrices A,B,C are owned by proc0, D,E,F are 3481 owned by proc1, G,H,I are owned by proc2. 3482 3483 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3484 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3485 The 'M','N' parameters are 8,8, and have the same values on all procs. 3486 3487 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3488 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3489 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3490 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3491 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3492 matrix, ans [DF] as another SeqAIJ matrix. 3493 3494 When d_nz, o_nz parameters are specified, d_nz storage elements are 3495 allocated for every row of the local diagonal submatrix, and o_nz 3496 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3497 One way to choose d_nz and o_nz is to use the max nonzerors per local 3498 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3499 In this case, the values of d_nz,o_nz are: 3500 .vb 3501 proc0 : dnz = 2, o_nz = 2 3502 proc1 : dnz = 3, o_nz = 2 3503 proc2 : dnz = 1, o_nz = 4 3504 .ve 3505 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3506 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3507 for proc3. i.e we are using 12+15+10=37 storage locations to store 3508 34 values. 3509 3510 When d_nnz, o_nnz parameters are specified, the storage is specified 3511 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3512 In the above case the values for d_nnz,o_nnz are: 3513 .vb 3514 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3515 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3516 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3517 .ve 3518 Here the space allocated is sum of all the above values i.e 34, and 3519 hence pre-allocation is perfect. 3520 3521 Level: intermediate 3522 3523 .keywords: matrix, aij, compressed row, sparse, parallel 3524 3525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3526 MPIAIJ, MatGetInfo() 3527 @*/ 3528 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3529 { 3530 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3531 3532 PetscFunctionBegin; 3533 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3534 if (f) { 3535 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3536 } 3537 PetscFunctionReturn(0); 3538 } 3539 3540 #undef __FUNCT__ 3541 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3542 /*@ 3543 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3544 CSR format the local rows. 3545 3546 Collective on MPI_Comm 3547 3548 Input Parameters: 3549 + comm - MPI communicator 3550 . m - number of local rows (Cannot be PETSC_DECIDE) 3551 . n - This value should be the same as the local size used in creating the 3552 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3553 calculated if N is given) For square matrices n is almost always m. 3554 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3555 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3556 . i - row indices 3557 . j - column indices 3558 - a - matrix values 3559 3560 Output Parameter: 3561 . mat - the matrix 3562 3563 Level: intermediate 3564 3565 Notes: 3566 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3567 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3568 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3569 3570 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3571 3572 The format which is used for the sparse matrix input, is equivalent to a 3573 row-major ordering.. i.e for the following matrix, the input data expected is 3574 as shown: 3575 3576 1 0 0 3577 2 0 3 P0 3578 ------- 3579 4 5 6 P1 3580 3581 Process0 [P0]: rows_owned=[0,1] 3582 i = {0,1,3} [size = nrow+1 = 2+1] 3583 j = {0,0,2} [size = nz = 6] 3584 v = {1,2,3} [size = nz = 6] 3585 3586 Process1 [P1]: rows_owned=[2] 3587 i = {0,3} [size = nrow+1 = 1+1] 3588 j = {0,1,2} [size = nz = 6] 3589 v = {4,5,6} [size = nz = 6] 3590 3591 .keywords: matrix, aij, compressed row, sparse, parallel 3592 3593 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3594 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3595 @*/ 3596 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3597 { 3598 PetscErrorCode ierr; 3599 3600 PetscFunctionBegin; 3601 if (i[0]) { 3602 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3603 } 3604 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3605 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3606 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3607 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3608 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3609 PetscFunctionReturn(0); 3610 } 3611 3612 #undef __FUNCT__ 3613 #define __FUNCT__ "MatCreateMPIAIJ" 3614 /*@C 3615 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3616 (the default parallel PETSc format). For good matrix assembly performance 3617 the user should preallocate the matrix storage by setting the parameters 3618 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3619 performance can be increased by more than a factor of 50. 3620 3621 Collective on MPI_Comm 3622 3623 Input Parameters: 3624 + comm - MPI communicator 3625 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3626 This value should be the same as the local size used in creating the 3627 y vector for the matrix-vector product y = Ax. 3628 . n - This value should be the same as the local size used in creating the 3629 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3630 calculated if N is given) For square matrices n is almost always m. 3631 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3632 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3633 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3634 (same value is used for all local rows) 3635 . d_nnz - array containing the number of nonzeros in the various rows of the 3636 DIAGONAL portion of the local submatrix (possibly different for each row) 3637 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3638 The size of this array is equal to the number of local rows, i.e 'm'. 3639 You must leave room for the diagonal entry even if it is zero. 3640 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3641 submatrix (same value is used for all local rows). 3642 - o_nnz - array containing the number of nonzeros in the various rows of the 3643 OFF-DIAGONAL portion of the local submatrix (possibly different for 3644 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3645 structure. The size of this array is equal to the number 3646 of local rows, i.e 'm'. 3647 3648 Output Parameter: 3649 . A - the matrix 3650 3651 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3652 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3653 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3654 3655 Notes: 3656 If the *_nnz parameter is given then the *_nz parameter is ignored 3657 3658 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3659 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3660 storage requirements for this matrix. 3661 3662 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3663 processor than it must be used on all processors that share the object for 3664 that argument. 3665 3666 The user MUST specify either the local or global matrix dimensions 3667 (possibly both). 3668 3669 The parallel matrix is partitioned across processors such that the 3670 first m0 rows belong to process 0, the next m1 rows belong to 3671 process 1, the next m2 rows belong to process 2 etc.. where 3672 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3673 values corresponding to [m x N] submatrix. 3674 3675 The columns are logically partitioned with the n0 columns belonging 3676 to 0th partition, the next n1 columns belonging to the next 3677 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3678 3679 The DIAGONAL portion of the local submatrix on any given processor 3680 is the submatrix corresponding to the rows and columns m,n 3681 corresponding to the given processor. i.e diagonal matrix on 3682 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3683 etc. The remaining portion of the local submatrix [m x (N-n)] 3684 constitute the OFF-DIAGONAL portion. The example below better 3685 illustrates this concept. 3686 3687 For a square global matrix we define each processor's diagonal portion 3688 to be its local rows and the corresponding columns (a square submatrix); 3689 each processor's off-diagonal portion encompasses the remainder of the 3690 local matrix (a rectangular submatrix). 3691 3692 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3693 3694 When calling this routine with a single process communicator, a matrix of 3695 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3696 type of communicator, use the construction mechanism: 3697 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3698 3699 By default, this format uses inodes (identical nodes) when possible. 3700 We search for consecutive rows with the same nonzero structure, thereby 3701 reusing matrix information to achieve increased efficiency. 3702 3703 Options Database Keys: 3704 + -mat_no_inode - Do not use inodes 3705 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3706 - -mat_aij_oneindex - Internally use indexing starting at 1 3707 rather than 0. Note that when calling MatSetValues(), 3708 the user still MUST index entries starting at 0! 3709 3710 3711 Example usage: 3712 3713 Consider the following 8x8 matrix with 34 non-zero values, that is 3714 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3715 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3716 as follows: 3717 3718 .vb 3719 1 2 0 | 0 3 0 | 0 4 3720 Proc0 0 5 6 | 7 0 0 | 8 0 3721 9 0 10 | 11 0 0 | 12 0 3722 ------------------------------------- 3723 13 0 14 | 15 16 17 | 0 0 3724 Proc1 0 18 0 | 19 20 21 | 0 0 3725 0 0 0 | 22 23 0 | 24 0 3726 ------------------------------------- 3727 Proc2 25 26 27 | 0 0 28 | 29 0 3728 30 0 0 | 31 32 33 | 0 34 3729 .ve 3730 3731 This can be represented as a collection of submatrices as: 3732 3733 .vb 3734 A B C 3735 D E F 3736 G H I 3737 .ve 3738 3739 Where the submatrices A,B,C are owned by proc0, D,E,F are 3740 owned by proc1, G,H,I are owned by proc2. 3741 3742 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3743 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3744 The 'M','N' parameters are 8,8, and have the same values on all procs. 3745 3746 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3747 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3748 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3749 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3750 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3751 matrix, ans [DF] as another SeqAIJ matrix. 3752 3753 When d_nz, o_nz parameters are specified, d_nz storage elements are 3754 allocated for every row of the local diagonal submatrix, and o_nz 3755 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3756 One way to choose d_nz and o_nz is to use the max nonzerors per local 3757 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3758 In this case, the values of d_nz,o_nz are: 3759 .vb 3760 proc0 : dnz = 2, o_nz = 2 3761 proc1 : dnz = 3, o_nz = 2 3762 proc2 : dnz = 1, o_nz = 4 3763 .ve 3764 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3765 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3766 for proc3. i.e we are using 12+15+10=37 storage locations to store 3767 34 values. 3768 3769 When d_nnz, o_nnz parameters are specified, the storage is specified 3770 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3771 In the above case the values for d_nnz,o_nnz are: 3772 .vb 3773 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3774 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3775 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3776 .ve 3777 Here the space allocated is sum of all the above values i.e 34, and 3778 hence pre-allocation is perfect. 3779 3780 Level: intermediate 3781 3782 .keywords: matrix, aij, compressed row, sparse, parallel 3783 3784 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3785 MPIAIJ, MatCreateMPIAIJWithArrays() 3786 @*/ 3787 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3788 { 3789 PetscErrorCode ierr; 3790 PetscMPIInt size; 3791 3792 PetscFunctionBegin; 3793 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3794 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3795 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3796 if (size > 1) { 3797 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3798 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3799 } else { 3800 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3801 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3802 } 3803 PetscFunctionReturn(0); 3804 } 3805 3806 #undef __FUNCT__ 3807 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3808 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3809 { 3810 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3811 3812 PetscFunctionBegin; 3813 *Ad = a->A; 3814 *Ao = a->B; 3815 *colmap = a->garray; 3816 PetscFunctionReturn(0); 3817 } 3818 3819 #undef __FUNCT__ 3820 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3821 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3822 { 3823 PetscErrorCode ierr; 3824 PetscInt i; 3825 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3826 3827 PetscFunctionBegin; 3828 if (coloring->ctype == IS_COLORING_GLOBAL) { 3829 ISColoringValue *allcolors,*colors; 3830 ISColoring ocoloring; 3831 3832 /* set coloring for diagonal portion */ 3833 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3834 3835 /* set coloring for off-diagonal portion */ 3836 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3837 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3838 for (i=0; i<a->B->cmap->n; i++) { 3839 colors[i] = allcolors[a->garray[i]]; 3840 } 3841 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3842 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3843 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3844 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3845 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3846 ISColoringValue *colors; 3847 PetscInt *larray; 3848 ISColoring ocoloring; 3849 3850 /* set coloring for diagonal portion */ 3851 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3852 for (i=0; i<a->A->cmap->n; i++) { 3853 larray[i] = i + A->cmap->rstart; 3854 } 3855 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3856 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3857 for (i=0; i<a->A->cmap->n; i++) { 3858 colors[i] = coloring->colors[larray[i]]; 3859 } 3860 ierr = PetscFree(larray);CHKERRQ(ierr); 3861 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3862 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3863 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3864 3865 /* set coloring for off-diagonal portion */ 3866 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3867 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3868 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3869 for (i=0; i<a->B->cmap->n; i++) { 3870 colors[i] = coloring->colors[larray[i]]; 3871 } 3872 ierr = PetscFree(larray);CHKERRQ(ierr); 3873 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3874 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3875 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3876 } else { 3877 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3878 } 3879 3880 PetscFunctionReturn(0); 3881 } 3882 3883 #if defined(PETSC_HAVE_ADIC) 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3886 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3887 { 3888 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3889 PetscErrorCode ierr; 3890 3891 PetscFunctionBegin; 3892 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3893 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896 #endif 3897 3898 #undef __FUNCT__ 3899 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3900 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3901 { 3902 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3903 PetscErrorCode ierr; 3904 3905 PetscFunctionBegin; 3906 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3907 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3908 PetscFunctionReturn(0); 3909 } 3910 3911 #undef __FUNCT__ 3912 #define __FUNCT__ "MatMerge" 3913 /*@ 3914 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3915 matrices from each processor 3916 3917 Collective on MPI_Comm 3918 3919 Input Parameters: 3920 + comm - the communicators the parallel matrix will live on 3921 . inmat - the input sequential matrices 3922 . n - number of local columns (or PETSC_DECIDE) 3923 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3924 3925 Output Parameter: 3926 . outmat - the parallel matrix generated 3927 3928 Level: advanced 3929 3930 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3931 3932 @*/ 3933 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3934 { 3935 PetscErrorCode ierr; 3936 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3937 PetscInt *indx; 3938 PetscScalar *values; 3939 3940 PetscFunctionBegin; 3941 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3942 if (scall == MAT_INITIAL_MATRIX){ 3943 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3944 if (n == PETSC_DECIDE){ 3945 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3946 } 3947 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3948 rstart -= m; 3949 3950 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3951 for (i=0;i<m;i++) { 3952 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3953 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3954 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3955 } 3956 /* This routine will ONLY return MPIAIJ type matrix */ 3957 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3958 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3959 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3960 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3961 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3962 3963 } else if (scall == MAT_REUSE_MATRIX){ 3964 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3965 } else { 3966 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3967 } 3968 3969 for (i=0;i<m;i++) { 3970 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3971 Ii = i + rstart; 3972 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3973 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3974 } 3975 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3976 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3977 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3978 3979 PetscFunctionReturn(0); 3980 } 3981 3982 #undef __FUNCT__ 3983 #define __FUNCT__ "MatFileSplit" 3984 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3985 { 3986 PetscErrorCode ierr; 3987 PetscMPIInt rank; 3988 PetscInt m,N,i,rstart,nnz; 3989 size_t len; 3990 const PetscInt *indx; 3991 PetscViewer out; 3992 char *name; 3993 Mat B; 3994 const PetscScalar *values; 3995 3996 PetscFunctionBegin; 3997 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3998 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3999 /* Should this be the type of the diagonal block of A? */ 4000 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4001 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4002 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4003 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4004 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4005 for (i=0;i<m;i++) { 4006 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4007 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4008 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4009 } 4010 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4011 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4012 4013 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4014 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4015 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4016 sprintf(name,"%s.%d",outfile,rank); 4017 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4018 ierr = PetscFree(name); 4019 ierr = MatView(B,out);CHKERRQ(ierr); 4020 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4021 ierr = MatDestroy(B);CHKERRQ(ierr); 4022 PetscFunctionReturn(0); 4023 } 4024 4025 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4026 #undef __FUNCT__ 4027 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4028 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4029 { 4030 PetscErrorCode ierr; 4031 Mat_Merge_SeqsToMPI *merge; 4032 PetscContainer container; 4033 4034 PetscFunctionBegin; 4035 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4036 if (container) { 4037 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4038 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4039 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4040 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4041 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4042 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4043 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4044 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4045 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4046 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4047 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4048 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4049 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4050 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4051 4052 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4053 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4054 } 4055 ierr = PetscFree(merge);CHKERRQ(ierr); 4056 4057 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4058 PetscFunctionReturn(0); 4059 } 4060 4061 #include "../src/mat/utils/freespace.h" 4062 #include "petscbt.h" 4063 4064 #undef __FUNCT__ 4065 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4066 /*@C 4067 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4068 matrices from each processor 4069 4070 Collective on MPI_Comm 4071 4072 Input Parameters: 4073 + comm - the communicators the parallel matrix will live on 4074 . seqmat - the input sequential matrices 4075 . m - number of local rows (or PETSC_DECIDE) 4076 . n - number of local columns (or PETSC_DECIDE) 4077 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4078 4079 Output Parameter: 4080 . mpimat - the parallel matrix generated 4081 4082 Level: advanced 4083 4084 Notes: 4085 The dimensions of the sequential matrix in each processor MUST be the same. 4086 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4087 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4088 @*/ 4089 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4090 { 4091 PetscErrorCode ierr; 4092 MPI_Comm comm=((PetscObject)mpimat)->comm; 4093 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4094 PetscMPIInt size,rank,taga,*len_s; 4095 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4096 PetscInt proc,m; 4097 PetscInt **buf_ri,**buf_rj; 4098 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4099 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4100 MPI_Request *s_waits,*r_waits; 4101 MPI_Status *status; 4102 MatScalar *aa=a->a; 4103 MatScalar **abuf_r,*ba_i; 4104 Mat_Merge_SeqsToMPI *merge; 4105 PetscContainer container; 4106 4107 PetscFunctionBegin; 4108 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4109 4110 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4111 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4112 4113 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4114 if (container) { 4115 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4116 } 4117 bi = merge->bi; 4118 bj = merge->bj; 4119 buf_ri = merge->buf_ri; 4120 buf_rj = merge->buf_rj; 4121 4122 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4123 owners = merge->rowmap->range; 4124 len_s = merge->len_s; 4125 4126 /* send and recv matrix values */ 4127 /*-----------------------------*/ 4128 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4129 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4130 4131 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4132 for (proc=0,k=0; proc<size; proc++){ 4133 if (!len_s[proc]) continue; 4134 i = owners[proc]; 4135 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4136 k++; 4137 } 4138 4139 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4140 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4141 ierr = PetscFree(status);CHKERRQ(ierr); 4142 4143 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4144 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4145 4146 /* insert mat values of mpimat */ 4147 /*----------------------------*/ 4148 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4149 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4150 4151 for (k=0; k<merge->nrecv; k++){ 4152 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4153 nrows = *(buf_ri_k[k]); 4154 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4155 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4156 } 4157 4158 /* set values of ba */ 4159 m = merge->rowmap->n; 4160 for (i=0; i<m; i++) { 4161 arow = owners[rank] + i; 4162 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4163 bnzi = bi[i+1] - bi[i]; 4164 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4165 4166 /* add local non-zero vals of this proc's seqmat into ba */ 4167 anzi = ai[arow+1] - ai[arow]; 4168 aj = a->j + ai[arow]; 4169 aa = a->a + ai[arow]; 4170 nextaj = 0; 4171 for (j=0; nextaj<anzi; j++){ 4172 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4173 ba_i[j] += aa[nextaj++]; 4174 } 4175 } 4176 4177 /* add received vals into ba */ 4178 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4179 /* i-th row */ 4180 if (i == *nextrow[k]) { 4181 anzi = *(nextai[k]+1) - *nextai[k]; 4182 aj = buf_rj[k] + *(nextai[k]); 4183 aa = abuf_r[k] + *(nextai[k]); 4184 nextaj = 0; 4185 for (j=0; nextaj<anzi; j++){ 4186 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4187 ba_i[j] += aa[nextaj++]; 4188 } 4189 } 4190 nextrow[k]++; nextai[k]++; 4191 } 4192 } 4193 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4194 } 4195 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4196 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4197 4198 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4199 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4200 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4201 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4202 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4203 PetscFunctionReturn(0); 4204 } 4205 4206 #undef __FUNCT__ 4207 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4208 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4209 { 4210 PetscErrorCode ierr; 4211 Mat B_mpi; 4212 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4213 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4214 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4215 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4216 PetscInt len,proc,*dnz,*onz; 4217 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4218 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4219 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4220 MPI_Status *status; 4221 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4222 PetscBT lnkbt; 4223 Mat_Merge_SeqsToMPI *merge; 4224 PetscContainer container; 4225 4226 PetscFunctionBegin; 4227 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4228 4229 /* make sure it is a PETSc comm */ 4230 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4231 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4232 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4233 4234 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4235 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4236 4237 /* determine row ownership */ 4238 /*---------------------------------------------------------*/ 4239 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4240 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4241 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4242 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4243 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4244 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4245 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4246 4247 m = merge->rowmap->n; 4248 M = merge->rowmap->N; 4249 owners = merge->rowmap->range; 4250 4251 /* determine the number of messages to send, their lengths */ 4252 /*---------------------------------------------------------*/ 4253 len_s = merge->len_s; 4254 4255 len = 0; /* length of buf_si[] */ 4256 merge->nsend = 0; 4257 for (proc=0; proc<size; proc++){ 4258 len_si[proc] = 0; 4259 if (proc == rank){ 4260 len_s[proc] = 0; 4261 } else { 4262 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4263 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4264 } 4265 if (len_s[proc]) { 4266 merge->nsend++; 4267 nrows = 0; 4268 for (i=owners[proc]; i<owners[proc+1]; i++){ 4269 if (ai[i+1] > ai[i]) nrows++; 4270 } 4271 len_si[proc] = 2*(nrows+1); 4272 len += len_si[proc]; 4273 } 4274 } 4275 4276 /* determine the number and length of messages to receive for ij-structure */ 4277 /*-------------------------------------------------------------------------*/ 4278 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4279 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4280 4281 /* post the Irecv of j-structure */ 4282 /*-------------------------------*/ 4283 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4284 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4285 4286 /* post the Isend of j-structure */ 4287 /*--------------------------------*/ 4288 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4289 4290 for (proc=0, k=0; proc<size; proc++){ 4291 if (!len_s[proc]) continue; 4292 i = owners[proc]; 4293 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4294 k++; 4295 } 4296 4297 /* receives and sends of j-structure are complete */ 4298 /*------------------------------------------------*/ 4299 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4300 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4301 4302 /* send and recv i-structure */ 4303 /*---------------------------*/ 4304 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4305 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4306 4307 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4308 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4309 for (proc=0,k=0; proc<size; proc++){ 4310 if (!len_s[proc]) continue; 4311 /* form outgoing message for i-structure: 4312 buf_si[0]: nrows to be sent 4313 [1:nrows]: row index (global) 4314 [nrows+1:2*nrows+1]: i-structure index 4315 */ 4316 /*-------------------------------------------*/ 4317 nrows = len_si[proc]/2 - 1; 4318 buf_si_i = buf_si + nrows+1; 4319 buf_si[0] = nrows; 4320 buf_si_i[0] = 0; 4321 nrows = 0; 4322 for (i=owners[proc]; i<owners[proc+1]; i++){ 4323 anzi = ai[i+1] - ai[i]; 4324 if (anzi) { 4325 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4326 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4327 nrows++; 4328 } 4329 } 4330 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4331 k++; 4332 buf_si += len_si[proc]; 4333 } 4334 4335 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4336 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4337 4338 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4339 for (i=0; i<merge->nrecv; i++){ 4340 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4341 } 4342 4343 ierr = PetscFree(len_si);CHKERRQ(ierr); 4344 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4345 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4346 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4347 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4348 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4349 ierr = PetscFree(status);CHKERRQ(ierr); 4350 4351 /* compute a local seq matrix in each processor */ 4352 /*----------------------------------------------*/ 4353 /* allocate bi array and free space for accumulating nonzero column info */ 4354 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4355 bi[0] = 0; 4356 4357 /* create and initialize a linked list */ 4358 nlnk = N+1; 4359 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4360 4361 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4362 len = 0; 4363 len = ai[owners[rank+1]] - ai[owners[rank]]; 4364 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4365 current_space = free_space; 4366 4367 /* determine symbolic info for each local row */ 4368 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4369 4370 for (k=0; k<merge->nrecv; k++){ 4371 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4372 nrows = *buf_ri_k[k]; 4373 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4374 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4375 } 4376 4377 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4378 len = 0; 4379 for (i=0;i<m;i++) { 4380 bnzi = 0; 4381 /* add local non-zero cols of this proc's seqmat into lnk */ 4382 arow = owners[rank] + i; 4383 anzi = ai[arow+1] - ai[arow]; 4384 aj = a->j + ai[arow]; 4385 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4386 bnzi += nlnk; 4387 /* add received col data into lnk */ 4388 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4389 if (i == *nextrow[k]) { /* i-th row */ 4390 anzi = *(nextai[k]+1) - *nextai[k]; 4391 aj = buf_rj[k] + *nextai[k]; 4392 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4393 bnzi += nlnk; 4394 nextrow[k]++; nextai[k]++; 4395 } 4396 } 4397 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4398 4399 /* if free space is not available, make more free space */ 4400 if (current_space->local_remaining<bnzi) { 4401 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4402 nspacedouble++; 4403 } 4404 /* copy data into free space, then initialize lnk */ 4405 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4406 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4407 4408 current_space->array += bnzi; 4409 current_space->local_used += bnzi; 4410 current_space->local_remaining -= bnzi; 4411 4412 bi[i+1] = bi[i] + bnzi; 4413 } 4414 4415 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4416 4417 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4418 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4419 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4420 4421 /* create symbolic parallel matrix B_mpi */ 4422 /*---------------------------------------*/ 4423 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4424 if (n==PETSC_DECIDE) { 4425 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4426 } else { 4427 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4428 } 4429 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4430 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4431 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4432 4433 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4434 B_mpi->assembled = PETSC_FALSE; 4435 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4436 merge->bi = bi; 4437 merge->bj = bj; 4438 merge->buf_ri = buf_ri; 4439 merge->buf_rj = buf_rj; 4440 merge->coi = PETSC_NULL; 4441 merge->coj = PETSC_NULL; 4442 merge->owners_co = PETSC_NULL; 4443 4444 /* attach the supporting struct to B_mpi for reuse */ 4445 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4446 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4447 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4448 *mpimat = B_mpi; 4449 4450 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4451 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4452 PetscFunctionReturn(0); 4453 } 4454 4455 #undef __FUNCT__ 4456 #define __FUNCT__ "MatMerge_SeqsToMPI" 4457 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4458 { 4459 PetscErrorCode ierr; 4460 4461 PetscFunctionBegin; 4462 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4463 if (scall == MAT_INITIAL_MATRIX){ 4464 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4465 } 4466 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4467 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 #undef __FUNCT__ 4472 #define __FUNCT__ "MatGetLocalMat" 4473 /*@ 4474 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4475 4476 Not Collective 4477 4478 Input Parameters: 4479 + A - the matrix 4480 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4481 4482 Output Parameter: 4483 . A_loc - the local sequential matrix generated 4484 4485 Level: developer 4486 4487 @*/ 4488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4489 { 4490 PetscErrorCode ierr; 4491 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4492 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4493 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4494 MatScalar *aa=a->a,*ba=b->a,*cam; 4495 PetscScalar *ca; 4496 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4497 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4498 4499 PetscFunctionBegin; 4500 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4501 if (scall == MAT_INITIAL_MATRIX){ 4502 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4503 ci[0] = 0; 4504 for (i=0; i<am; i++){ 4505 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4506 } 4507 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4508 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4509 k = 0; 4510 for (i=0; i<am; i++) { 4511 ncols_o = bi[i+1] - bi[i]; 4512 ncols_d = ai[i+1] - ai[i]; 4513 /* off-diagonal portion of A */ 4514 for (jo=0; jo<ncols_o; jo++) { 4515 col = cmap[*bj]; 4516 if (col >= cstart) break; 4517 cj[k] = col; bj++; 4518 ca[k++] = *ba++; 4519 } 4520 /* diagonal portion of A */ 4521 for (j=0; j<ncols_d; j++) { 4522 cj[k] = cstart + *aj++; 4523 ca[k++] = *aa++; 4524 } 4525 /* off-diagonal portion of A */ 4526 for (j=jo; j<ncols_o; j++) { 4527 cj[k] = cmap[*bj++]; 4528 ca[k++] = *ba++; 4529 } 4530 } 4531 /* put together the new matrix */ 4532 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4533 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4534 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4535 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4536 mat->free_a = PETSC_TRUE; 4537 mat->free_ij = PETSC_TRUE; 4538 mat->nonew = 0; 4539 } else if (scall == MAT_REUSE_MATRIX){ 4540 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4541 ci = mat->i; cj = mat->j; cam = mat->a; 4542 for (i=0; i<am; i++) { 4543 /* off-diagonal portion of A */ 4544 ncols_o = bi[i+1] - bi[i]; 4545 for (jo=0; jo<ncols_o; jo++) { 4546 col = cmap[*bj]; 4547 if (col >= cstart) break; 4548 *cam++ = *ba++; bj++; 4549 } 4550 /* diagonal portion of A */ 4551 ncols_d = ai[i+1] - ai[i]; 4552 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4553 /* off-diagonal portion of A */ 4554 for (j=jo; j<ncols_o; j++) { 4555 *cam++ = *ba++; bj++; 4556 } 4557 } 4558 } else { 4559 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4560 } 4561 4562 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4563 PetscFunctionReturn(0); 4564 } 4565 4566 #undef __FUNCT__ 4567 #define __FUNCT__ "MatGetLocalMatCondensed" 4568 /*@C 4569 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4570 4571 Not Collective 4572 4573 Input Parameters: 4574 + A - the matrix 4575 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4576 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4577 4578 Output Parameter: 4579 . A_loc - the local sequential matrix generated 4580 4581 Level: developer 4582 4583 @*/ 4584 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4585 { 4586 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4587 PetscErrorCode ierr; 4588 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4589 IS isrowa,iscola; 4590 Mat *aloc; 4591 4592 PetscFunctionBegin; 4593 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4594 if (!row){ 4595 start = A->rmap->rstart; end = A->rmap->rend; 4596 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4597 } else { 4598 isrowa = *row; 4599 } 4600 if (!col){ 4601 start = A->cmap->rstart; 4602 cmap = a->garray; 4603 nzA = a->A->cmap->n; 4604 nzB = a->B->cmap->n; 4605 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4606 ncols = 0; 4607 for (i=0; i<nzB; i++) { 4608 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4609 else break; 4610 } 4611 imark = i; 4612 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4613 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4614 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4615 ierr = PetscFree(idx);CHKERRQ(ierr); 4616 } else { 4617 iscola = *col; 4618 } 4619 if (scall != MAT_INITIAL_MATRIX){ 4620 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4621 aloc[0] = *A_loc; 4622 } 4623 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4624 *A_loc = aloc[0]; 4625 ierr = PetscFree(aloc);CHKERRQ(ierr); 4626 if (!row){ 4627 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4628 } 4629 if (!col){ 4630 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4631 } 4632 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4633 PetscFunctionReturn(0); 4634 } 4635 4636 #undef __FUNCT__ 4637 #define __FUNCT__ "MatGetBrowsOfAcols" 4638 /*@C 4639 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4640 4641 Collective on Mat 4642 4643 Input Parameters: 4644 + A,B - the matrices in mpiaij format 4645 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4646 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4647 4648 Output Parameter: 4649 + rowb, colb - index sets of rows and columns of B to extract 4650 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4651 - B_seq - the sequential matrix generated 4652 4653 Level: developer 4654 4655 @*/ 4656 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4657 { 4658 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4659 PetscErrorCode ierr; 4660 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4661 IS isrowb,iscolb; 4662 Mat *bseq; 4663 4664 PetscFunctionBegin; 4665 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4666 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4667 } 4668 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4669 4670 if (scall == MAT_INITIAL_MATRIX){ 4671 start = A->cmap->rstart; 4672 cmap = a->garray; 4673 nzA = a->A->cmap->n; 4674 nzB = a->B->cmap->n; 4675 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4676 ncols = 0; 4677 for (i=0; i<nzB; i++) { /* row < local row index */ 4678 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4679 else break; 4680 } 4681 imark = i; 4682 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4683 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4684 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4685 ierr = PetscFree(idx);CHKERRQ(ierr); 4686 *brstart = imark; 4687 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4688 } else { 4689 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4690 isrowb = *rowb; iscolb = *colb; 4691 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4692 bseq[0] = *B_seq; 4693 } 4694 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4695 *B_seq = bseq[0]; 4696 ierr = PetscFree(bseq);CHKERRQ(ierr); 4697 if (!rowb){ 4698 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4699 } else { 4700 *rowb = isrowb; 4701 } 4702 if (!colb){ 4703 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4704 } else { 4705 *colb = iscolb; 4706 } 4707 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 #undef __FUNCT__ 4712 #define __FUNCT__ "MatGetBrowsOfAoCols" 4713 /*@C 4714 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4715 of the OFF-DIAGONAL portion of local A 4716 4717 Collective on Mat 4718 4719 Input Parameters: 4720 + A,B - the matrices in mpiaij format 4721 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4722 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4723 . startsj_r - similar to startsj for receives 4724 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4725 4726 Output Parameter: 4727 + B_oth - the sequential matrix generated 4728 4729 Level: developer 4730 4731 @*/ 4732 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4733 { 4734 VecScatter_MPI_General *gen_to,*gen_from; 4735 PetscErrorCode ierr; 4736 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4737 Mat_SeqAIJ *b_oth; 4738 VecScatter ctx=a->Mvctx; 4739 MPI_Comm comm=((PetscObject)ctx)->comm; 4740 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4741 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4742 PetscScalar *rvalues,*svalues; 4743 MatScalar *b_otha,*bufa,*bufA; 4744 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4745 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4746 MPI_Status *sstatus,rstatus; 4747 PetscMPIInt jj; 4748 PetscInt *cols,sbs,rbs; 4749 PetscScalar *vals; 4750 4751 PetscFunctionBegin; 4752 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4753 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4754 } 4755 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4756 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4757 4758 gen_to = (VecScatter_MPI_General*)ctx->todata; 4759 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4760 rvalues = gen_from->values; /* holds the length of receiving row */ 4761 svalues = gen_to->values; /* holds the length of sending row */ 4762 nrecvs = gen_from->n; 4763 nsends = gen_to->n; 4764 4765 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4766 srow = gen_to->indices; /* local row index to be sent */ 4767 sstarts = gen_to->starts; 4768 sprocs = gen_to->procs; 4769 sstatus = gen_to->sstatus; 4770 sbs = gen_to->bs; 4771 rstarts = gen_from->starts; 4772 rprocs = gen_from->procs; 4773 rbs = gen_from->bs; 4774 4775 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4776 if (scall == MAT_INITIAL_MATRIX){ 4777 /* i-array */ 4778 /*---------*/ 4779 /* post receives */ 4780 for (i=0; i<nrecvs; i++){ 4781 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4782 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4783 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4784 } 4785 4786 /* pack the outgoing message */ 4787 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4788 sstartsj[0] = 0; rstartsj[0] = 0; 4789 len = 0; /* total length of j or a array to be sent */ 4790 k = 0; 4791 for (i=0; i<nsends; i++){ 4792 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4793 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4794 for (j=0; j<nrows; j++) { 4795 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4796 for (l=0; l<sbs; l++){ 4797 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4798 rowlen[j*sbs+l] = ncols; 4799 len += ncols; 4800 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4801 } 4802 k++; 4803 } 4804 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4805 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4806 } 4807 /* recvs and sends of i-array are completed */ 4808 i = nrecvs; 4809 while (i--) { 4810 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4811 } 4812 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4813 4814 /* allocate buffers for sending j and a arrays */ 4815 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4816 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4817 4818 /* create i-array of B_oth */ 4819 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4820 b_othi[0] = 0; 4821 len = 0; /* total length of j or a array to be received */ 4822 k = 0; 4823 for (i=0; i<nrecvs; i++){ 4824 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4825 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4826 for (j=0; j<nrows; j++) { 4827 b_othi[k+1] = b_othi[k] + rowlen[j]; 4828 len += rowlen[j]; k++; 4829 } 4830 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4831 } 4832 4833 /* allocate space for j and a arrrays of B_oth */ 4834 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4835 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4836 4837 /* j-array */ 4838 /*---------*/ 4839 /* post receives of j-array */ 4840 for (i=0; i<nrecvs; i++){ 4841 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4842 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4843 } 4844 4845 /* pack the outgoing message j-array */ 4846 k = 0; 4847 for (i=0; i<nsends; i++){ 4848 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4849 bufJ = bufj+sstartsj[i]; 4850 for (j=0; j<nrows; j++) { 4851 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4852 for (ll=0; ll<sbs; ll++){ 4853 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4854 for (l=0; l<ncols; l++){ 4855 *bufJ++ = cols[l]; 4856 } 4857 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4858 } 4859 } 4860 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4861 } 4862 4863 /* recvs and sends of j-array are completed */ 4864 i = nrecvs; 4865 while (i--) { 4866 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4867 } 4868 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4869 } else if (scall == MAT_REUSE_MATRIX){ 4870 sstartsj = *startsj; 4871 rstartsj = *startsj_r; 4872 bufa = *bufa_ptr; 4873 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4874 b_otha = b_oth->a; 4875 } else { 4876 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4877 } 4878 4879 /* a-array */ 4880 /*---------*/ 4881 /* post receives of a-array */ 4882 for (i=0; i<nrecvs; i++){ 4883 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4884 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4885 } 4886 4887 /* pack the outgoing message a-array */ 4888 k = 0; 4889 for (i=0; i<nsends; i++){ 4890 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4891 bufA = bufa+sstartsj[i]; 4892 for (j=0; j<nrows; j++) { 4893 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4894 for (ll=0; ll<sbs; ll++){ 4895 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4896 for (l=0; l<ncols; l++){ 4897 *bufA++ = vals[l]; 4898 } 4899 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4900 } 4901 } 4902 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4903 } 4904 /* recvs and sends of a-array are completed */ 4905 i = nrecvs; 4906 while (i--) { 4907 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4908 } 4909 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4910 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4911 4912 if (scall == MAT_INITIAL_MATRIX){ 4913 /* put together the new matrix */ 4914 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4915 4916 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4917 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4918 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4919 b_oth->free_a = PETSC_TRUE; 4920 b_oth->free_ij = PETSC_TRUE; 4921 b_oth->nonew = 0; 4922 4923 ierr = PetscFree(bufj);CHKERRQ(ierr); 4924 if (!startsj || !bufa_ptr){ 4925 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4926 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4927 } else { 4928 *startsj = sstartsj; 4929 *startsj_r = rstartsj; 4930 *bufa_ptr = bufa; 4931 } 4932 } 4933 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4934 PetscFunctionReturn(0); 4935 } 4936 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatGetCommunicationStructs" 4939 /*@C 4940 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4941 4942 Not Collective 4943 4944 Input Parameters: 4945 . A - The matrix in mpiaij format 4946 4947 Output Parameter: 4948 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4949 . colmap - A map from global column index to local index into lvec 4950 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4951 4952 Level: developer 4953 4954 @*/ 4955 #if defined (PETSC_USE_CTABLE) 4956 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4957 #else 4958 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4959 #endif 4960 { 4961 Mat_MPIAIJ *a; 4962 4963 PetscFunctionBegin; 4964 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4965 PetscValidPointer(lvec, 2); 4966 PetscValidPointer(colmap, 3); 4967 PetscValidPointer(multScatter, 4); 4968 a = (Mat_MPIAIJ *) A->data; 4969 if (lvec) *lvec = a->lvec; 4970 if (colmap) *colmap = a->colmap; 4971 if (multScatter) *multScatter = a->Mvctx; 4972 PetscFunctionReturn(0); 4973 } 4974 4975 EXTERN_C_BEGIN 4976 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4977 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4978 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4979 EXTERN_C_END 4980 4981 #undef __FUNCT__ 4982 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4983 /* 4984 Computes (B'*A')' since computing B*A directly is untenable 4985 4986 n p p 4987 ( ) ( ) ( ) 4988 m ( A ) * n ( B ) = m ( C ) 4989 ( ) ( ) ( ) 4990 4991 */ 4992 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4993 { 4994 PetscErrorCode ierr; 4995 Mat At,Bt,Ct; 4996 4997 PetscFunctionBegin; 4998 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4999 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5000 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5001 ierr = MatDestroy(At);CHKERRQ(ierr); 5002 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5003 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5004 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 5008 #undef __FUNCT__ 5009 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5010 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5011 { 5012 PetscErrorCode ierr; 5013 PetscInt m=A->rmap->n,n=B->cmap->n; 5014 Mat Cmat; 5015 5016 PetscFunctionBegin; 5017 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5018 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5019 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5020 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5021 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5022 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5023 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5024 *C = Cmat; 5025 PetscFunctionReturn(0); 5026 } 5027 5028 /* ----------------------------------------------------------------*/ 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5031 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5032 { 5033 PetscErrorCode ierr; 5034 5035 PetscFunctionBegin; 5036 if (scall == MAT_INITIAL_MATRIX){ 5037 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5038 } 5039 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5040 PetscFunctionReturn(0); 5041 } 5042 5043 EXTERN_C_BEGIN 5044 #if defined(PETSC_HAVE_MUMPS) 5045 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5046 #endif 5047 #if defined(PETSC_HAVE_PASTIX) 5048 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5049 #endif 5050 #if defined(PETSC_HAVE_SUPERLU_DIST) 5051 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5052 #endif 5053 #if defined(PETSC_HAVE_SPOOLES) 5054 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5055 #endif 5056 EXTERN_C_END 5057 5058 /*MC 5059 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5060 5061 Options Database Keys: 5062 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5063 5064 Level: beginner 5065 5066 .seealso: MatCreateMPIAIJ() 5067 M*/ 5068 5069 EXTERN_C_BEGIN 5070 #undef __FUNCT__ 5071 #define __FUNCT__ "MatCreate_MPIAIJ" 5072 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5073 { 5074 Mat_MPIAIJ *b; 5075 PetscErrorCode ierr; 5076 PetscMPIInt size; 5077 5078 PetscFunctionBegin; 5079 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5080 5081 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5082 B->data = (void*)b; 5083 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5084 B->rmap->bs = 1; 5085 B->assembled = PETSC_FALSE; 5086 B->mapping = 0; 5087 5088 B->insertmode = NOT_SET_VALUES; 5089 b->size = size; 5090 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5091 5092 /* build cache for off array entries formed */ 5093 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5094 b->donotstash = PETSC_FALSE; 5095 b->colmap = 0; 5096 b->garray = 0; 5097 b->roworiented = PETSC_TRUE; 5098 5099 /* stuff used for matrix vector multiply */ 5100 b->lvec = PETSC_NULL; 5101 b->Mvctx = PETSC_NULL; 5102 5103 /* stuff for MatGetRow() */ 5104 b->rowindices = 0; 5105 b->rowvalues = 0; 5106 b->getrowactive = PETSC_FALSE; 5107 5108 #if defined(PETSC_HAVE_SPOOLES) 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5110 "MatGetFactor_mpiaij_spooles", 5111 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5112 #endif 5113 #if defined(PETSC_HAVE_MUMPS) 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5115 "MatGetFactor_aij_mumps", 5116 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5117 #endif 5118 #if defined(PETSC_HAVE_PASTIX) 5119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5120 "MatGetFactor_mpiaij_pastix", 5121 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5122 #endif 5123 #if defined(PETSC_HAVE_SUPERLU_DIST) 5124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5125 "MatGetFactor_mpiaij_superlu_dist", 5126 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5127 #endif 5128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5129 "MatStoreValues_MPIAIJ", 5130 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5132 "MatRetrieveValues_MPIAIJ", 5133 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5135 "MatGetDiagonalBlock_MPIAIJ", 5136 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5137 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5138 "MatIsTranspose_MPIAIJ", 5139 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5140 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5141 "MatMPIAIJSetPreallocation_MPIAIJ", 5142 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5143 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5144 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5145 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5146 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5147 "MatDiagonalScaleLocal_MPIAIJ", 5148 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5150 "MatConvert_MPIAIJ_MPIAIJPERM", 5151 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5152 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5153 "MatConvert_MPIAIJ_MPIAIJCRL", 5154 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5155 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5156 "MatConvert_MPIAIJ_MPISBAIJ", 5157 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5158 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5159 "MatMatMult_MPIDense_MPIAIJ", 5160 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5161 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5162 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5163 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5164 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5165 "MatMatMultNumeric_MPIDense_MPIAIJ", 5166 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5167 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 EXTERN_C_END 5171 5172 #undef __FUNCT__ 5173 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5174 /*@ 5175 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5176 and "off-diagonal" part of the matrix in CSR format. 5177 5178 Collective on MPI_Comm 5179 5180 Input Parameters: 5181 + comm - MPI communicator 5182 . m - number of local rows (Cannot be PETSC_DECIDE) 5183 . n - This value should be the same as the local size used in creating the 5184 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5185 calculated if N is given) For square matrices n is almost always m. 5186 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5187 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5188 . i - row indices for "diagonal" portion of matrix 5189 . j - column indices 5190 . a - matrix values 5191 . oi - row indices for "off-diagonal" portion of matrix 5192 . oj - column indices 5193 - oa - matrix values 5194 5195 Output Parameter: 5196 . mat - the matrix 5197 5198 Level: advanced 5199 5200 Notes: 5201 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5202 5203 The i and j indices are 0 based 5204 5205 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5206 5207 This sets local rows and cannot be used to set off-processor values. 5208 5209 You cannot later use MatSetValues() to change values in this matrix. 5210 5211 .keywords: matrix, aij, compressed row, sparse, parallel 5212 5213 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5214 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5215 @*/ 5216 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5217 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5218 { 5219 PetscErrorCode ierr; 5220 Mat_MPIAIJ *maij; 5221 5222 PetscFunctionBegin; 5223 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5224 if (i[0]) { 5225 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5226 } 5227 if (oi[0]) { 5228 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5229 } 5230 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5231 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5232 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5233 maij = (Mat_MPIAIJ*) (*mat)->data; 5234 maij->donotstash = PETSC_TRUE; 5235 (*mat)->preallocated = PETSC_TRUE; 5236 5237 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5238 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5239 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5240 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5241 5242 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5243 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5244 5245 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5246 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5247 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5248 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5249 5250 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5251 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5252 PetscFunctionReturn(0); 5253 } 5254 5255 /* 5256 Special version for direct calls from Fortran 5257 */ 5258 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5259 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5260 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5261 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5262 #endif 5263 5264 /* Change these macros so can be used in void function */ 5265 #undef CHKERRQ 5266 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5267 #undef SETERRQ2 5268 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5269 #undef SETERRQ 5270 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5271 5272 EXTERN_C_BEGIN 5273 #undef __FUNCT__ 5274 #define __FUNCT__ "matsetvaluesmpiaij_" 5275 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5276 { 5277 Mat mat = *mmat; 5278 PetscInt m = *mm, n = *mn; 5279 InsertMode addv = *maddv; 5280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5281 PetscScalar value; 5282 PetscErrorCode ierr; 5283 5284 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5285 if (mat->insertmode == NOT_SET_VALUES) { 5286 mat->insertmode = addv; 5287 } 5288 #if defined(PETSC_USE_DEBUG) 5289 else if (mat->insertmode != addv) { 5290 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5291 } 5292 #endif 5293 { 5294 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5295 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5296 PetscTruth roworiented = aij->roworiented; 5297 5298 /* Some Variables required in the macro */ 5299 Mat A = aij->A; 5300 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5301 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5302 MatScalar *aa = a->a; 5303 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5304 Mat B = aij->B; 5305 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5306 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5307 MatScalar *ba = b->a; 5308 5309 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5310 PetscInt nonew = a->nonew; 5311 MatScalar *ap1,*ap2; 5312 5313 PetscFunctionBegin; 5314 for (i=0; i<m; i++) { 5315 if (im[i] < 0) continue; 5316 #if defined(PETSC_USE_DEBUG) 5317 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5318 #endif 5319 if (im[i] >= rstart && im[i] < rend) { 5320 row = im[i] - rstart; 5321 lastcol1 = -1; 5322 rp1 = aj + ai[row]; 5323 ap1 = aa + ai[row]; 5324 rmax1 = aimax[row]; 5325 nrow1 = ailen[row]; 5326 low1 = 0; 5327 high1 = nrow1; 5328 lastcol2 = -1; 5329 rp2 = bj + bi[row]; 5330 ap2 = ba + bi[row]; 5331 rmax2 = bimax[row]; 5332 nrow2 = bilen[row]; 5333 low2 = 0; 5334 high2 = nrow2; 5335 5336 for (j=0; j<n; j++) { 5337 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5338 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5339 if (in[j] >= cstart && in[j] < cend){ 5340 col = in[j] - cstart; 5341 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5342 } else if (in[j] < 0) continue; 5343 #if defined(PETSC_USE_DEBUG) 5344 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5345 #endif 5346 else { 5347 if (mat->was_assembled) { 5348 if (!aij->colmap) { 5349 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5350 } 5351 #if defined (PETSC_USE_CTABLE) 5352 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5353 col--; 5354 #else 5355 col = aij->colmap[in[j]] - 1; 5356 #endif 5357 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5358 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5359 col = in[j]; 5360 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5361 B = aij->B; 5362 b = (Mat_SeqAIJ*)B->data; 5363 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5364 rp2 = bj + bi[row]; 5365 ap2 = ba + bi[row]; 5366 rmax2 = bimax[row]; 5367 nrow2 = bilen[row]; 5368 low2 = 0; 5369 high2 = nrow2; 5370 bm = aij->B->rmap->n; 5371 ba = b->a; 5372 } 5373 } else col = in[j]; 5374 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5375 } 5376 } 5377 } else { 5378 if (!aij->donotstash) { 5379 if (roworiented) { 5380 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5381 } else { 5382 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5383 } 5384 } 5385 } 5386 }} 5387 PetscFunctionReturnVoid(); 5388 } 5389 EXTERN_C_END 5390 5391