1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 const PetscScalar *xx; 575 PetscScalar *bb; 576 #if defined(PETSC_DEBUG) 577 PetscBool found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 if (A->nooffproczerorows) { 608 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 609 nrecvs = nsends; 610 nmax = N; 611 } else { 612 /* inform other processors of number of messages and max length*/ 613 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 614 } 615 616 /* post receives: */ 617 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 618 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 619 for (i=0; i<nrecvs; i++) { 620 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 621 } 622 623 /* do sends: 624 1) starts[i] gives the starting index in svalues for stuff going to 625 the ith processor 626 */ 627 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 628 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 629 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 630 starts[0] = 0; 631 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 for (i=0; i<N; i++) { 633 svalues[starts[owner[i]]++] = rows[i]; 634 } 635 636 starts[0] = 0; 637 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 638 count = 0; 639 for (i=0; i<size; i++) { 640 if (nprocs[2*i+1]) { 641 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 642 } 643 } 644 ierr = PetscFree(starts);CHKERRQ(ierr); 645 646 base = owners[rank]; 647 648 /* wait on receives */ 649 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 650 count = nrecvs; slen = 0; 651 while (count) { 652 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 653 /* unpack receives into our local space */ 654 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 655 source[imdex] = recv_status.MPI_SOURCE; 656 lens[imdex] = n; 657 slen += n; 658 count--; 659 } 660 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 661 662 /* move the data into the send scatter */ 663 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 664 count = 0; 665 for (i=0; i<nrecvs; i++) { 666 values = rvalues + i*nmax; 667 for (j=0; j<lens[i]; j++) { 668 lrows[count++] = values[j] - base; 669 } 670 } 671 ierr = PetscFree(rvalues);CHKERRQ(ierr); 672 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 673 ierr = PetscFree(owner);CHKERRQ(ierr); 674 ierr = PetscFree(nprocs);CHKERRQ(ierr); 675 676 /* fix right hand side if needed */ 677 if (x && b) { 678 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 679 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 680 for (i=0; i<N; i++) { 681 bb[lrows[i]] = diag*xx[lrows[i]]; 682 } 683 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 684 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 685 } 686 /* 687 Zero the required rows. If the "diagonal block" of the matrix 688 is square and the user wishes to set the diagonal we use separate 689 code so that MatSetValues() is not called for each diagonal allocating 690 new memory, thus calling lots of mallocs and slowing things down. 691 692 */ 693 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 694 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 695 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 696 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 697 } else if (diag != 0.0) { 698 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 699 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 702 } 703 for (i = 0; i < slen; i++) { 704 row = lrows[i] + rstart; 705 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 706 } 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 } else { 710 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 711 } 712 ierr = PetscFree(lrows);CHKERRQ(ierr); 713 714 /* wait on sends */ 715 if (nsends) { 716 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 717 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 718 ierr = PetscFree(send_status);CHKERRQ(ierr); 719 } 720 ierr = PetscFree(send_waits);CHKERRQ(ierr); 721 ierr = PetscFree(svalues);CHKERRQ(ierr); 722 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatMult_MPIAIJ" 728 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 729 { 730 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 PetscInt nt; 733 734 PetscFunctionBegin; 735 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 736 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 737 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 738 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 739 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 746 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 747 { 748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatMultAdd_MPIAIJ" 758 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 759 { 760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 765 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 766 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 767 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 773 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 774 { 775 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 776 PetscErrorCode ierr; 777 PetscBool merged; 778 779 PetscFunctionBegin; 780 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 781 /* do nondiagonal part */ 782 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 783 if (!merged) { 784 /* send it on its way */ 785 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 786 /* do local part */ 787 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 788 /* receive remote parts: note this assumes the values are not actually */ 789 /* added in yy until the next line, */ 790 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 791 } else { 792 /* do local part */ 793 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 794 /* send it on its way */ 795 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 796 /* values actually were received in the Begin() but we need to call this nop */ 797 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 EXTERN_C_BEGIN 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 805 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 806 { 807 MPI_Comm comm; 808 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 809 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 810 IS Me,Notme; 811 PetscErrorCode ierr; 812 PetscInt M,N,first,last,*notme,i; 813 PetscMPIInt size; 814 815 PetscFunctionBegin; 816 817 /* Easy test: symmetric diagonal block */ 818 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 819 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 820 if (!*f) PetscFunctionReturn(0); 821 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 823 if (size == 1) PetscFunctionReturn(0); 824 825 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 826 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 827 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 828 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 829 for (i=0; i<first; i++) notme[i] = i; 830 for (i=last; i<M; i++) notme[i-last+first] = i; 831 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 832 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 833 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 834 Aoff = Aoffs[0]; 835 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 836 Boff = Boffs[0]; 837 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 838 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 839 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 840 ierr = ISDestroy(Me);CHKERRQ(ierr); 841 ierr = ISDestroy(Notme);CHKERRQ(ierr); 842 843 PetscFunctionReturn(0); 844 } 845 EXTERN_C_END 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 849 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 850 { 851 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 /* do nondiagonal part */ 856 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 857 /* send it on its way */ 858 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 859 /* do local part */ 860 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 861 /* receive remote parts */ 862 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 /* 867 This only works correctly for square matrices where the subblock A->A is the 868 diagonal block 869 */ 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 872 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 873 { 874 PetscErrorCode ierr; 875 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 876 877 PetscFunctionBegin; 878 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 879 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 880 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatScale_MPIAIJ" 886 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 887 { 888 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 889 PetscErrorCode ierr; 890 891 PetscFunctionBegin; 892 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 893 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatDestroy_MPIAIJ" 899 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 900 { 901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 #if defined(PETSC_USE_LOG) 906 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 907 #endif 908 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 909 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 910 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 911 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 912 #if defined (PETSC_USE_CTABLE) 913 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 914 #else 915 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 916 #endif 917 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 918 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 919 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 920 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 921 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 922 ierr = PetscFree(aij);CHKERRQ(ierr); 923 924 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 925 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 926 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 927 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 928 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 929 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 930 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 931 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 932 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatView_MPIAIJ_Binary" 938 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 939 { 940 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 941 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 942 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 943 PetscErrorCode ierr; 944 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 945 int fd; 946 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 947 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 948 PetscScalar *column_values; 949 PetscInt message_count,flowcontrolcount; 950 951 PetscFunctionBegin; 952 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 953 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 954 nz = A->nz + B->nz; 955 if (!rank) { 956 header[0] = MAT_FILE_CLASSID; 957 header[1] = mat->rmap->N; 958 header[2] = mat->cmap->N; 959 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 960 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 961 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 962 /* get largest number of rows any processor has */ 963 rlen = mat->rmap->n; 964 range = mat->rmap->range; 965 for (i=1; i<size; i++) { 966 rlen = PetscMax(rlen,range[i+1] - range[i]); 967 } 968 } else { 969 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 970 rlen = mat->rmap->n; 971 } 972 973 /* load up the local row counts */ 974 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 975 for (i=0; i<mat->rmap->n; i++) { 976 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 977 } 978 979 /* store the row lengths to the file */ 980 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 981 if (!rank) { 982 MPI_Status status; 983 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 984 for (i=1; i<size; i++) { 985 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 986 rlen = range[i+1] - range[i]; 987 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 988 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 989 } 990 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 991 } else { 992 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 993 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 994 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 995 } 996 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 997 998 /* load up the local column indices */ 999 nzmax = nz; /* )th processor needs space a largest processor needs */ 1000 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1001 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1002 cnt = 0; 1003 for (i=0; i<mat->rmap->n; i++) { 1004 for (j=B->i[i]; j<B->i[i+1]; j++) { 1005 if ( (col = garray[B->j[j]]) > cstart) break; 1006 column_indices[cnt++] = col; 1007 } 1008 for (k=A->i[i]; k<A->i[i+1]; k++) { 1009 column_indices[cnt++] = A->j[k] + cstart; 1010 } 1011 for (; j<B->i[i+1]; j++) { 1012 column_indices[cnt++] = garray[B->j[j]]; 1013 } 1014 } 1015 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1016 1017 /* store the column indices to the file */ 1018 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1019 if (!rank) { 1020 MPI_Status status; 1021 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1022 for (i=1; i<size; i++) { 1023 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1024 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1025 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1026 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1027 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1028 } 1029 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1030 } else { 1031 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1032 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1033 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1034 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1035 } 1036 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1037 1038 /* load up the local column values */ 1039 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1040 cnt = 0; 1041 for (i=0; i<mat->rmap->n; i++) { 1042 for (j=B->i[i]; j<B->i[i+1]; j++) { 1043 if ( garray[B->j[j]] > cstart) break; 1044 column_values[cnt++] = B->a[j]; 1045 } 1046 for (k=A->i[i]; k<A->i[i+1]; k++) { 1047 column_values[cnt++] = A->a[k]; 1048 } 1049 for (; j<B->i[i+1]; j++) { 1050 column_values[cnt++] = B->a[j]; 1051 } 1052 } 1053 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1054 1055 /* store the column values to the file */ 1056 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1057 if (!rank) { 1058 MPI_Status status; 1059 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1060 for (i=1; i<size; i++) { 1061 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1062 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1063 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1064 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1065 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1066 } 1067 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1068 } else { 1069 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1070 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1071 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1072 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1073 } 1074 ierr = PetscFree(column_values);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1080 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1081 { 1082 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1083 PetscErrorCode ierr; 1084 PetscMPIInt rank = aij->rank,size = aij->size; 1085 PetscBool isdraw,iascii,isbinary; 1086 PetscViewer sviewer; 1087 PetscViewerFormat format; 1088 1089 PetscFunctionBegin; 1090 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1091 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1092 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1093 if (iascii) { 1094 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1095 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1096 MatInfo info; 1097 PetscBool inodes; 1098 1099 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1100 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1102 if (!inodes) { 1103 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1104 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1105 } else { 1106 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1107 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1108 } 1109 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1110 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1111 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1112 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1113 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1114 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1115 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1118 PetscInt inodecount,inodelimit,*inodes; 1119 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1120 if (inodes) { 1121 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1122 } else { 1123 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1124 } 1125 PetscFunctionReturn(0); 1126 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1127 PetscFunctionReturn(0); 1128 } 1129 } else if (isbinary) { 1130 if (size == 1) { 1131 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1132 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1135 } 1136 PetscFunctionReturn(0); 1137 } else if (isdraw) { 1138 PetscDraw draw; 1139 PetscBool isnull; 1140 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1141 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1142 } 1143 1144 if (size == 1) { 1145 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1146 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1147 } else { 1148 /* assemble the entire matrix onto first processor. */ 1149 Mat A; 1150 Mat_SeqAIJ *Aloc; 1151 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1152 MatScalar *a; 1153 1154 if (mat->rmap->N > 1024) { 1155 PetscBool flg = PETSC_FALSE; 1156 1157 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1158 if (!flg) { 1159 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1160 } 1161 } 1162 1163 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1166 } else { 1167 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1168 } 1169 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1170 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1171 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1172 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1173 1174 /* copy over the A part */ 1175 Aloc = (Mat_SeqAIJ*)aij->A->data; 1176 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1177 row = mat->rmap->rstart; 1178 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1179 for (i=0; i<m; i++) { 1180 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1181 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1182 } 1183 aj = Aloc->j; 1184 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1185 1186 /* copy over the B part */ 1187 Aloc = (Mat_SeqAIJ*)aij->B->data; 1188 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1189 row = mat->rmap->rstart; 1190 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1191 ct = cols; 1192 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1193 for (i=0; i<m; i++) { 1194 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1195 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1196 } 1197 ierr = PetscFree(ct);CHKERRQ(ierr); 1198 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1199 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1200 /* 1201 Everyone has to call to draw the matrix since the graphics waits are 1202 synchronized across all processors that share the PetscDraw object 1203 */ 1204 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1205 if (!rank) { 1206 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1207 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1208 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1209 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1210 } 1211 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1212 ierr = MatDestroy(A);CHKERRQ(ierr); 1213 } 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatView_MPIAIJ" 1219 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1220 { 1221 PetscErrorCode ierr; 1222 PetscBool iascii,isdraw,issocket,isbinary; 1223 1224 PetscFunctionBegin; 1225 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1226 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1227 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1228 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1229 if (iascii || isdraw || isbinary || issocket) { 1230 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1231 } else { 1232 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatSOR_MPIAIJ" 1239 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1240 { 1241 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1242 PetscErrorCode ierr; 1243 Vec bb1 = 0; 1244 PetscBool hasop; 1245 1246 PetscFunctionBegin; 1247 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1248 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1249 } 1250 1251 if (flag == SOR_APPLY_UPPER) { 1252 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1257 if (flag & SOR_ZERO_INITIAL_GUESS) { 1258 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1259 its--; 1260 } 1261 1262 while (its--) { 1263 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 1266 /* update rhs: bb1 = bb - B*x */ 1267 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1268 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1269 1270 /* local sweep */ 1271 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1272 } 1273 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1274 if (flag & SOR_ZERO_INITIAL_GUESS) { 1275 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1276 its--; 1277 } 1278 while (its--) { 1279 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 1282 /* update rhs: bb1 = bb - B*x */ 1283 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1284 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1285 1286 /* local sweep */ 1287 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1288 } 1289 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1290 if (flag & SOR_ZERO_INITIAL_GUESS) { 1291 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1292 its--; 1293 } 1294 while (its--) { 1295 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1296 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1297 1298 /* update rhs: bb1 = bb - B*x */ 1299 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1300 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1301 1302 /* local sweep */ 1303 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1304 } 1305 } else if (flag & SOR_EISENSTAT) { 1306 Vec xx1; 1307 1308 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1309 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1310 1311 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1312 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1313 if (!mat->diag) { 1314 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1315 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1316 } 1317 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1318 if (hasop) { 1319 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1320 } else { 1321 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1322 } 1323 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1324 1325 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1326 1327 /* local sweep */ 1328 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1329 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1330 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1331 } else { 1332 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1333 } 1334 1335 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatPermute_MPIAIJ" 1341 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1342 { 1343 MPI_Comm comm,pcomm; 1344 PetscInt first,local_size,nrows; 1345 const PetscInt *rows; 1346 PetscMPIInt size; 1347 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1352 /* make a collective version of 'rowp' */ 1353 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1354 if (pcomm==comm) { 1355 crowp = rowp; 1356 } else { 1357 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1358 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1359 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1360 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1361 } 1362 /* collect the global row permutation and invert it */ 1363 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1364 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1365 if (pcomm!=comm) { 1366 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1367 } 1368 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1369 /* get the local target indices */ 1370 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1371 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1372 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1373 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1374 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1375 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1376 /* the column permutation is so much easier; 1377 make a local version of 'colp' and invert it */ 1378 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1379 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1380 if (size==1) { 1381 lcolp = colp; 1382 } else { 1383 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1384 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1385 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1386 } 1387 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1388 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1389 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1390 if (size>1) { 1391 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1392 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1393 } 1394 /* now we just get the submatrix */ 1395 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1396 /* clean up */ 1397 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1398 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1404 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1405 { 1406 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1407 Mat A = mat->A,B = mat->B; 1408 PetscErrorCode ierr; 1409 PetscReal isend[5],irecv[5]; 1410 1411 PetscFunctionBegin; 1412 info->block_size = 1.0; 1413 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1414 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1415 isend[3] = info->memory; isend[4] = info->mallocs; 1416 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1417 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1418 isend[3] += info->memory; isend[4] += info->mallocs; 1419 if (flag == MAT_LOCAL) { 1420 info->nz_used = isend[0]; 1421 info->nz_allocated = isend[1]; 1422 info->nz_unneeded = isend[2]; 1423 info->memory = isend[3]; 1424 info->mallocs = isend[4]; 1425 } else if (flag == MAT_GLOBAL_MAX) { 1426 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1427 info->nz_used = irecv[0]; 1428 info->nz_allocated = irecv[1]; 1429 info->nz_unneeded = irecv[2]; 1430 info->memory = irecv[3]; 1431 info->mallocs = irecv[4]; 1432 } else if (flag == MAT_GLOBAL_SUM) { 1433 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1434 info->nz_used = irecv[0]; 1435 info->nz_allocated = irecv[1]; 1436 info->nz_unneeded = irecv[2]; 1437 info->memory = irecv[3]; 1438 info->mallocs = irecv[4]; 1439 } 1440 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1441 info->fill_ratio_needed = 0; 1442 info->factor_mallocs = 0; 1443 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatSetOption_MPIAIJ" 1449 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1450 { 1451 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 switch (op) { 1456 case MAT_NEW_NONZERO_LOCATIONS: 1457 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1458 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1459 case MAT_KEEP_NONZERO_PATTERN: 1460 case MAT_NEW_NONZERO_LOCATION_ERR: 1461 case MAT_USE_INODES: 1462 case MAT_IGNORE_ZERO_ENTRIES: 1463 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1464 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1465 break; 1466 case MAT_ROW_ORIENTED: 1467 a->roworiented = flg; 1468 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1469 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1470 break; 1471 case MAT_NEW_DIAGONALS: 1472 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1473 break; 1474 case MAT_IGNORE_OFF_PROC_ENTRIES: 1475 a->donotstash = PETSC_TRUE; 1476 break; 1477 case MAT_SPD: 1478 A->spd_set = PETSC_TRUE; 1479 A->spd = flg; 1480 if (flg) { 1481 A->symmetric = PETSC_TRUE; 1482 A->structurally_symmetric = PETSC_TRUE; 1483 A->symmetric_set = PETSC_TRUE; 1484 A->structurally_symmetric_set = PETSC_TRUE; 1485 } 1486 break; 1487 case MAT_SYMMETRIC: 1488 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1489 break; 1490 case MAT_STRUCTURALLY_SYMMETRIC: 1491 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1492 break; 1493 case MAT_HERMITIAN: 1494 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1495 break; 1496 case MAT_SYMMETRY_ETERNAL: 1497 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1498 break; 1499 default: 1500 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatGetRow_MPIAIJ" 1507 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1508 { 1509 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1510 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1511 PetscErrorCode ierr; 1512 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1513 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1514 PetscInt *cmap,*idx_p; 1515 1516 PetscFunctionBegin; 1517 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1518 mat->getrowactive = PETSC_TRUE; 1519 1520 if (!mat->rowvalues && (idx || v)) { 1521 /* 1522 allocate enough space to hold information from the longest row. 1523 */ 1524 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1525 PetscInt max = 1,tmp; 1526 for (i=0; i<matin->rmap->n; i++) { 1527 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1528 if (max < tmp) { max = tmp; } 1529 } 1530 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1531 } 1532 1533 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1534 lrow = row - rstart; 1535 1536 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1537 if (!v) {pvA = 0; pvB = 0;} 1538 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1539 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1540 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1541 nztot = nzA + nzB; 1542 1543 cmap = mat->garray; 1544 if (v || idx) { 1545 if (nztot) { 1546 /* Sort by increasing column numbers, assuming A and B already sorted */ 1547 PetscInt imark = -1; 1548 if (v) { 1549 *v = v_p = mat->rowvalues; 1550 for (i=0; i<nzB; i++) { 1551 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1552 else break; 1553 } 1554 imark = i; 1555 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1556 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1557 } 1558 if (idx) { 1559 *idx = idx_p = mat->rowindices; 1560 if (imark > -1) { 1561 for (i=0; i<imark; i++) { 1562 idx_p[i] = cmap[cworkB[i]]; 1563 } 1564 } else { 1565 for (i=0; i<nzB; i++) { 1566 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1567 else break; 1568 } 1569 imark = i; 1570 } 1571 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1572 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1573 } 1574 } else { 1575 if (idx) *idx = 0; 1576 if (v) *v = 0; 1577 } 1578 } 1579 *nz = nztot; 1580 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1581 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1587 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1588 { 1589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1590 1591 PetscFunctionBegin; 1592 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1593 aij->getrowactive = PETSC_FALSE; 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatNorm_MPIAIJ" 1599 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1600 { 1601 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1602 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1603 PetscErrorCode ierr; 1604 PetscInt i,j,cstart = mat->cmap->rstart; 1605 PetscReal sum = 0.0; 1606 MatScalar *v; 1607 1608 PetscFunctionBegin; 1609 if (aij->size == 1) { 1610 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1611 } else { 1612 if (type == NORM_FROBENIUS) { 1613 v = amat->a; 1614 for (i=0; i<amat->nz; i++) { 1615 #if defined(PETSC_USE_COMPLEX) 1616 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1617 #else 1618 sum += (*v)*(*v); v++; 1619 #endif 1620 } 1621 v = bmat->a; 1622 for (i=0; i<bmat->nz; i++) { 1623 #if defined(PETSC_USE_COMPLEX) 1624 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1625 #else 1626 sum += (*v)*(*v); v++; 1627 #endif 1628 } 1629 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1630 *norm = sqrt(*norm); 1631 } else if (type == NORM_1) { /* max column norm */ 1632 PetscReal *tmp,*tmp2; 1633 PetscInt *jj,*garray = aij->garray; 1634 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1635 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1636 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1637 *norm = 0.0; 1638 v = amat->a; jj = amat->j; 1639 for (j=0; j<amat->nz; j++) { 1640 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1641 } 1642 v = bmat->a; jj = bmat->j; 1643 for (j=0; j<bmat->nz; j++) { 1644 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1645 } 1646 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1647 for (j=0; j<mat->cmap->N; j++) { 1648 if (tmp2[j] > *norm) *norm = tmp2[j]; 1649 } 1650 ierr = PetscFree(tmp);CHKERRQ(ierr); 1651 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1652 } else if (type == NORM_INFINITY) { /* max row norm */ 1653 PetscReal ntemp = 0.0; 1654 for (j=0; j<aij->A->rmap->n; j++) { 1655 v = amat->a + amat->i[j]; 1656 sum = 0.0; 1657 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1658 sum += PetscAbsScalar(*v); v++; 1659 } 1660 v = bmat->a + bmat->i[j]; 1661 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1662 sum += PetscAbsScalar(*v); v++; 1663 } 1664 if (sum > ntemp) ntemp = sum; 1665 } 1666 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1667 } else { 1668 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1669 } 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatTranspose_MPIAIJ" 1676 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1677 { 1678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1679 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1680 PetscErrorCode ierr; 1681 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1682 PetscInt cstart=A->cmap->rstart,ncol; 1683 Mat B; 1684 MatScalar *array; 1685 1686 PetscFunctionBegin; 1687 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1688 1689 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1690 ai = Aloc->i; aj = Aloc->j; 1691 bi = Bloc->i; bj = Bloc->j; 1692 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1693 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1694 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1695 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1696 for (i=0; i<ai[ma]; i++){ 1697 d_nnz[aj[i]] ++; 1698 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1699 } 1700 1701 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1702 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1703 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1704 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1705 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1706 } else { 1707 B = *matout; 1708 } 1709 1710 /* copy over the A part */ 1711 array = Aloc->a; 1712 row = A->rmap->rstart; 1713 for (i=0; i<ma; i++) { 1714 ncol = ai[i+1]-ai[i]; 1715 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1716 row++; array += ncol; aj += ncol; 1717 } 1718 aj = Aloc->j; 1719 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1720 1721 /* copy over the B part */ 1722 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1723 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1724 array = Bloc->a; 1725 row = A->rmap->rstart; 1726 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1727 cols_tmp = cols; 1728 for (i=0; i<mb; i++) { 1729 ncol = bi[i+1]-bi[i]; 1730 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1731 row++; array += ncol; cols_tmp += ncol; 1732 } 1733 ierr = PetscFree(cols);CHKERRQ(ierr); 1734 1735 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1736 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1737 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1738 *matout = B; 1739 } else { 1740 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1747 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1748 { 1749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1750 Mat a = aij->A,b = aij->B; 1751 PetscErrorCode ierr; 1752 PetscInt s1,s2,s3; 1753 1754 PetscFunctionBegin; 1755 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1756 if (rr) { 1757 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1758 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1759 /* Overlap communication with computation. */ 1760 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1761 } 1762 if (ll) { 1763 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1764 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1765 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1766 } 1767 /* scale the diagonal block */ 1768 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1769 1770 if (rr) { 1771 /* Do a scatter end and then right scale the off-diagonal block */ 1772 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1774 } 1775 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1781 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1782 { 1783 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1784 PetscErrorCode ierr; 1785 1786 PetscFunctionBegin; 1787 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1788 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1789 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1790 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1795 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1796 { 1797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatEqual_MPIAIJ" 1807 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1808 { 1809 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1810 Mat a,b,c,d; 1811 PetscBool flg; 1812 PetscErrorCode ierr; 1813 1814 PetscFunctionBegin; 1815 a = matA->A; b = matA->B; 1816 c = matB->A; d = matB->B; 1817 1818 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1819 if (flg) { 1820 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1821 } 1822 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatCopy_MPIAIJ" 1828 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1829 { 1830 PetscErrorCode ierr; 1831 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1832 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1833 1834 PetscFunctionBegin; 1835 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1836 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1837 /* because of the column compression in the off-processor part of the matrix a->B, 1838 the number of columns in a->B and b->B may be different, hence we cannot call 1839 the MatCopy() directly on the two parts. If need be, we can provide a more 1840 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1841 then copying the submatrices */ 1842 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1843 } else { 1844 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1845 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1852 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1853 { 1854 PetscErrorCode ierr; 1855 1856 PetscFunctionBegin; 1857 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatAXPY_MPIAIJ" 1863 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1864 { 1865 PetscErrorCode ierr; 1866 PetscInt i; 1867 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1868 PetscBLASInt bnz,one=1; 1869 Mat_SeqAIJ *x,*y; 1870 1871 PetscFunctionBegin; 1872 if (str == SAME_NONZERO_PATTERN) { 1873 PetscScalar alpha = a; 1874 x = (Mat_SeqAIJ *)xx->A->data; 1875 y = (Mat_SeqAIJ *)yy->A->data; 1876 bnz = PetscBLASIntCast(x->nz); 1877 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1878 x = (Mat_SeqAIJ *)xx->B->data; 1879 y = (Mat_SeqAIJ *)yy->B->data; 1880 bnz = PetscBLASIntCast(x->nz); 1881 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1882 } else if (str == SUBSET_NONZERO_PATTERN) { 1883 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1884 1885 x = (Mat_SeqAIJ *)xx->B->data; 1886 y = (Mat_SeqAIJ *)yy->B->data; 1887 if (y->xtoy && y->XtoY != xx->B) { 1888 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1889 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1890 } 1891 if (!y->xtoy) { /* get xtoy */ 1892 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1893 y->XtoY = xx->B; 1894 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1895 } 1896 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1897 } else { 1898 Mat B; 1899 PetscInt *nnz_d,*nnz_o; 1900 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 1901 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 1902 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 1903 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1904 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1905 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 1906 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1907 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 1908 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 1909 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1910 ierr = MatHeaderReplace(Y,B); 1911 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1912 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatConjugate_MPIAIJ" 1921 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1922 { 1923 #if defined(PETSC_USE_COMPLEX) 1924 PetscErrorCode ierr; 1925 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1926 1927 PetscFunctionBegin; 1928 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1929 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1930 #else 1931 PetscFunctionBegin; 1932 #endif 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatRealPart_MPIAIJ" 1938 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1939 { 1940 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1945 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1951 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1952 { 1953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1958 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #ifdef PETSC_HAVE_PBGL 1963 1964 #include <boost/parallel/mpi/bsp_process_group.hpp> 1965 #include <boost/graph/distributed/ilu_default_graph.hpp> 1966 #include <boost/graph/distributed/ilu_0_block.hpp> 1967 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1968 #include <boost/graph/distributed/petsc/interface.hpp> 1969 #include <boost/multi_array.hpp> 1970 #include <boost/parallel/distributed_property_map->hpp> 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1974 /* 1975 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1976 */ 1977 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1978 { 1979 namespace petsc = boost::distributed::petsc; 1980 1981 namespace graph_dist = boost::graph::distributed; 1982 using boost::graph::distributed::ilu_default::process_group_type; 1983 using boost::graph::ilu_permuted; 1984 1985 PetscBool row_identity, col_identity; 1986 PetscContainer c; 1987 PetscInt m, n, M, N; 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1992 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1993 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1994 if (!row_identity || !col_identity) { 1995 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1996 } 1997 1998 process_group_type pg; 1999 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2000 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2001 lgraph_type& level_graph = *lgraph_p; 2002 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2003 2004 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2005 ilu_permuted(level_graph); 2006 2007 /* put together the new matrix */ 2008 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2009 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2010 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2011 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2012 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2013 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2014 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2015 2016 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2017 ierr = PetscContainerSetPointer(c, lgraph_p); 2018 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2024 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2025 { 2026 PetscFunctionBegin; 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatSolve_MPIAIJ" 2032 /* 2033 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2034 */ 2035 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2036 { 2037 namespace graph_dist = boost::graph::distributed; 2038 2039 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2040 lgraph_type* lgraph_p; 2041 PetscContainer c; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2046 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2047 ierr = VecCopy(b, x);CHKERRQ(ierr); 2048 2049 PetscScalar* array_x; 2050 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2051 PetscInt sx; 2052 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2053 2054 PetscScalar* array_b; 2055 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2056 PetscInt sb; 2057 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2058 2059 lgraph_type& level_graph = *lgraph_p; 2060 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2061 2062 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2063 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2064 ref_x(array_x, boost::extents[num_vertices(graph)]); 2065 2066 typedef boost::iterator_property_map<array_ref_type::iterator, 2067 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2068 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2069 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2070 2071 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2072 2073 PetscFunctionReturn(0); 2074 } 2075 #endif 2076 2077 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2078 PetscInt nzlocal,nsends,nrecvs; 2079 PetscMPIInt *send_rank,*recv_rank; 2080 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2081 PetscScalar *sbuf_a,**rbuf_a; 2082 PetscErrorCode (*MatDestroy)(Mat); 2083 } Mat_Redundant; 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2087 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2088 { 2089 PetscErrorCode ierr; 2090 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2091 PetscInt i; 2092 2093 PetscFunctionBegin; 2094 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2095 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2096 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2097 for (i=0; i<redund->nrecvs; i++){ 2098 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2099 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2100 } 2101 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2102 ierr = PetscFree(redund);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatDestroy_MatRedundant" 2108 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2109 { 2110 PetscErrorCode ierr; 2111 PetscContainer container; 2112 Mat_Redundant *redund=PETSC_NULL; 2113 2114 PetscFunctionBegin; 2115 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2116 if (container) { 2117 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2118 } else { 2119 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2120 } 2121 A->ops->destroy = redund->MatDestroy; 2122 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2123 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2124 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2130 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2131 { 2132 PetscMPIInt rank,size; 2133 MPI_Comm comm=((PetscObject)mat)->comm; 2134 PetscErrorCode ierr; 2135 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2136 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2137 PetscInt *rowrange=mat->rmap->range; 2138 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2139 Mat A=aij->A,B=aij->B,C=*matredundant; 2140 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2141 PetscScalar *sbuf_a; 2142 PetscInt nzlocal=a->nz+b->nz; 2143 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2144 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2145 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2146 MatScalar *aworkA,*aworkB; 2147 PetscScalar *vals; 2148 PetscMPIInt tag1,tag2,tag3,imdex; 2149 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2150 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2151 MPI_Status recv_status,*send_status; 2152 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2153 PetscInt **rbuf_j=PETSC_NULL; 2154 PetscScalar **rbuf_a=PETSC_NULL; 2155 Mat_Redundant *redund=PETSC_NULL; 2156 PetscContainer container; 2157 2158 PetscFunctionBegin; 2159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2160 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2161 2162 if (reuse == MAT_REUSE_MATRIX) { 2163 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2164 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2165 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2166 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2167 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2168 if (container) { 2169 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2170 } else { 2171 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2172 } 2173 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2174 2175 nsends = redund->nsends; 2176 nrecvs = redund->nrecvs; 2177 send_rank = redund->send_rank; 2178 recv_rank = redund->recv_rank; 2179 sbuf_nz = redund->sbuf_nz; 2180 rbuf_nz = redund->rbuf_nz; 2181 sbuf_j = redund->sbuf_j; 2182 sbuf_a = redund->sbuf_a; 2183 rbuf_j = redund->rbuf_j; 2184 rbuf_a = redund->rbuf_a; 2185 } 2186 2187 if (reuse == MAT_INITIAL_MATRIX){ 2188 PetscMPIInt subrank,subsize; 2189 PetscInt nleftover,np_subcomm; 2190 /* get the destination processors' id send_rank, nsends and nrecvs */ 2191 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2192 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2193 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2194 np_subcomm = size/nsubcomm; 2195 nleftover = size - nsubcomm*np_subcomm; 2196 nsends = 0; nrecvs = 0; 2197 for (i=0; i<size; i++){ /* i=rank*/ 2198 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2199 send_rank[nsends] = i; nsends++; 2200 recv_rank[nrecvs++] = i; 2201 } 2202 } 2203 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2204 i = size-nleftover-1; 2205 j = 0; 2206 while (j < nsubcomm - nleftover){ 2207 send_rank[nsends++] = i; 2208 i--; j++; 2209 } 2210 } 2211 2212 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2213 for (i=0; i<nleftover; i++){ 2214 recv_rank[nrecvs++] = size-nleftover+i; 2215 } 2216 } 2217 2218 /* allocate sbuf_j, sbuf_a */ 2219 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2220 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2221 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2222 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2223 2224 /* copy mat's local entries into the buffers */ 2225 if (reuse == MAT_INITIAL_MATRIX){ 2226 rownz_max = 0; 2227 rptr = sbuf_j; 2228 cols = sbuf_j + rend-rstart + 1; 2229 vals = sbuf_a; 2230 rptr[0] = 0; 2231 for (i=0; i<rend-rstart; i++){ 2232 row = i + rstart; 2233 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2234 ncols = nzA + nzB; 2235 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2236 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2237 /* load the column indices for this row into cols */ 2238 lwrite = 0; 2239 for (l=0; l<nzB; l++) { 2240 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2241 vals[lwrite] = aworkB[l]; 2242 cols[lwrite++] = ctmp; 2243 } 2244 } 2245 for (l=0; l<nzA; l++){ 2246 vals[lwrite] = aworkA[l]; 2247 cols[lwrite++] = cstart + cworkA[l]; 2248 } 2249 for (l=0; l<nzB; l++) { 2250 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2251 vals[lwrite] = aworkB[l]; 2252 cols[lwrite++] = ctmp; 2253 } 2254 } 2255 vals += ncols; 2256 cols += ncols; 2257 rptr[i+1] = rptr[i] + ncols; 2258 if (rownz_max < ncols) rownz_max = ncols; 2259 } 2260 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2261 } else { /* only copy matrix values into sbuf_a */ 2262 rptr = sbuf_j; 2263 vals = sbuf_a; 2264 rptr[0] = 0; 2265 for (i=0; i<rend-rstart; i++){ 2266 row = i + rstart; 2267 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2268 ncols = nzA + nzB; 2269 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2270 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2271 lwrite = 0; 2272 for (l=0; l<nzB; l++) { 2273 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2274 } 2275 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2276 for (l=0; l<nzB; l++) { 2277 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2278 } 2279 vals += ncols; 2280 rptr[i+1] = rptr[i] + ncols; 2281 } 2282 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2283 2284 /* send nzlocal to others, and recv other's nzlocal */ 2285 /*--------------------------------------------------*/ 2286 if (reuse == MAT_INITIAL_MATRIX){ 2287 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2288 s_waits2 = s_waits3 + nsends; 2289 s_waits1 = s_waits2 + nsends; 2290 r_waits1 = s_waits1 + nsends; 2291 r_waits2 = r_waits1 + nrecvs; 2292 r_waits3 = r_waits2 + nrecvs; 2293 } else { 2294 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2295 r_waits3 = s_waits3 + nsends; 2296 } 2297 2298 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2299 if (reuse == MAT_INITIAL_MATRIX){ 2300 /* get new tags to keep the communication clean */ 2301 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2303 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2304 2305 /* post receives of other's nzlocal */ 2306 for (i=0; i<nrecvs; i++){ 2307 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2308 } 2309 /* send nzlocal to others */ 2310 for (i=0; i<nsends; i++){ 2311 sbuf_nz[i] = nzlocal; 2312 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2313 } 2314 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2315 count = nrecvs; 2316 while (count) { 2317 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2318 recv_rank[imdex] = recv_status.MPI_SOURCE; 2319 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2320 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2321 2322 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2323 rbuf_nz[imdex] += i + 2; 2324 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2325 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2326 count--; 2327 } 2328 /* wait on sends of nzlocal */ 2329 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2330 /* send mat->i,j to others, and recv from other's */ 2331 /*------------------------------------------------*/ 2332 for (i=0; i<nsends; i++){ 2333 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2334 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2335 } 2336 /* wait on receives of mat->i,j */ 2337 /*------------------------------*/ 2338 count = nrecvs; 2339 while (count) { 2340 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2341 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2342 count--; 2343 } 2344 /* wait on sends of mat->i,j */ 2345 /*---------------------------*/ 2346 if (nsends) { 2347 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2348 } 2349 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2350 2351 /* post receives, send and receive mat->a */ 2352 /*----------------------------------------*/ 2353 for (imdex=0; imdex<nrecvs; imdex++) { 2354 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2355 } 2356 for (i=0; i<nsends; i++){ 2357 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2358 } 2359 count = nrecvs; 2360 while (count) { 2361 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2362 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2363 count--; 2364 } 2365 if (nsends) { 2366 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2367 } 2368 2369 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2370 2371 /* create redundant matrix */ 2372 /*-------------------------*/ 2373 if (reuse == MAT_INITIAL_MATRIX){ 2374 /* compute rownz_max for preallocation */ 2375 for (imdex=0; imdex<nrecvs; imdex++){ 2376 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2377 rptr = rbuf_j[imdex]; 2378 for (i=0; i<j; i++){ 2379 ncols = rptr[i+1] - rptr[i]; 2380 if (rownz_max < ncols) rownz_max = ncols; 2381 } 2382 } 2383 2384 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2385 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2386 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2387 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2388 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2389 } else { 2390 C = *matredundant; 2391 } 2392 2393 /* insert local matrix entries */ 2394 rptr = sbuf_j; 2395 cols = sbuf_j + rend-rstart + 1; 2396 vals = sbuf_a; 2397 for (i=0; i<rend-rstart; i++){ 2398 row = i + rstart; 2399 ncols = rptr[i+1] - rptr[i]; 2400 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2401 vals += ncols; 2402 cols += ncols; 2403 } 2404 /* insert received matrix entries */ 2405 for (imdex=0; imdex<nrecvs; imdex++){ 2406 rstart = rowrange[recv_rank[imdex]]; 2407 rend = rowrange[recv_rank[imdex]+1]; 2408 rptr = rbuf_j[imdex]; 2409 cols = rbuf_j[imdex] + rend-rstart + 1; 2410 vals = rbuf_a[imdex]; 2411 for (i=0; i<rend-rstart; i++){ 2412 row = i + rstart; 2413 ncols = rptr[i+1] - rptr[i]; 2414 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2415 vals += ncols; 2416 cols += ncols; 2417 } 2418 } 2419 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2420 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2421 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2422 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2423 if (reuse == MAT_INITIAL_MATRIX){ 2424 PetscContainer container; 2425 *matredundant = C; 2426 /* create a supporting struct and attach it to C for reuse */ 2427 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2428 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2429 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2430 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2431 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2432 2433 redund->nzlocal = nzlocal; 2434 redund->nsends = nsends; 2435 redund->nrecvs = nrecvs; 2436 redund->send_rank = send_rank; 2437 redund->recv_rank = recv_rank; 2438 redund->sbuf_nz = sbuf_nz; 2439 redund->rbuf_nz = rbuf_nz; 2440 redund->sbuf_j = sbuf_j; 2441 redund->sbuf_a = sbuf_a; 2442 redund->rbuf_j = rbuf_j; 2443 redund->rbuf_a = rbuf_a; 2444 2445 redund->MatDestroy = C->ops->destroy; 2446 C->ops->destroy = MatDestroy_MatRedundant; 2447 } 2448 PetscFunctionReturn(0); 2449 } 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2453 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2454 { 2455 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2456 PetscErrorCode ierr; 2457 PetscInt i,*idxb = 0; 2458 PetscScalar *va,*vb; 2459 Vec vtmp; 2460 2461 PetscFunctionBegin; 2462 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2463 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2464 if (idx) { 2465 for (i=0; i<A->rmap->n; i++) { 2466 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2467 } 2468 } 2469 2470 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2471 if (idx) { 2472 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2473 } 2474 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2475 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2476 2477 for (i=0; i<A->rmap->n; i++){ 2478 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2479 va[i] = vb[i]; 2480 if (idx) idx[i] = a->garray[idxb[i]]; 2481 } 2482 } 2483 2484 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2485 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2486 if (idxb) { 2487 ierr = PetscFree(idxb);CHKERRQ(ierr); 2488 } 2489 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #undef __FUNCT__ 2494 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2495 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2496 { 2497 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2498 PetscErrorCode ierr; 2499 PetscInt i,*idxb = 0; 2500 PetscScalar *va,*vb; 2501 Vec vtmp; 2502 2503 PetscFunctionBegin; 2504 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2506 if (idx) { 2507 for (i=0; i<A->cmap->n; i++) { 2508 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2509 } 2510 } 2511 2512 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2513 if (idx) { 2514 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2515 } 2516 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2517 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2518 2519 for (i=0; i<A->rmap->n; i++){ 2520 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2521 va[i] = vb[i]; 2522 if (idx) idx[i] = a->garray[idxb[i]]; 2523 } 2524 } 2525 2526 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2527 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2528 if (idxb) { 2529 ierr = PetscFree(idxb);CHKERRQ(ierr); 2530 } 2531 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2537 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2538 { 2539 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2540 PetscInt n = A->rmap->n; 2541 PetscInt cstart = A->cmap->rstart; 2542 PetscInt *cmap = mat->garray; 2543 PetscInt *diagIdx, *offdiagIdx; 2544 Vec diagV, offdiagV; 2545 PetscScalar *a, *diagA, *offdiagA; 2546 PetscInt r; 2547 PetscErrorCode ierr; 2548 2549 PetscFunctionBegin; 2550 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2551 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2552 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2553 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2554 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2555 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2556 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2557 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2558 for(r = 0; r < n; ++r) { 2559 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2560 a[r] = diagA[r]; 2561 idx[r] = cstart + diagIdx[r]; 2562 } else { 2563 a[r] = offdiagA[r]; 2564 idx[r] = cmap[offdiagIdx[r]]; 2565 } 2566 } 2567 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2568 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2569 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2570 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2571 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2572 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2578 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2579 { 2580 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2581 PetscInt n = A->rmap->n; 2582 PetscInt cstart = A->cmap->rstart; 2583 PetscInt *cmap = mat->garray; 2584 PetscInt *diagIdx, *offdiagIdx; 2585 Vec diagV, offdiagV; 2586 PetscScalar *a, *diagA, *offdiagA; 2587 PetscInt r; 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2592 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2593 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2594 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2595 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2596 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2597 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2598 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2599 for(r = 0; r < n; ++r) { 2600 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2601 a[r] = diagA[r]; 2602 idx[r] = cstart + diagIdx[r]; 2603 } else { 2604 a[r] = offdiagA[r]; 2605 idx[r] = cmap[offdiagIdx[r]]; 2606 } 2607 } 2608 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2609 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2610 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2611 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2612 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2613 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2614 PetscFunctionReturn(0); 2615 } 2616 2617 #undef __FUNCT__ 2618 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2619 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2620 { 2621 PetscErrorCode ierr; 2622 Mat *dummy; 2623 2624 PetscFunctionBegin; 2625 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2626 *newmat = *dummy; 2627 ierr = PetscFree(dummy);CHKERRQ(ierr); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2632 /* -------------------------------------------------------------------*/ 2633 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2634 MatGetRow_MPIAIJ, 2635 MatRestoreRow_MPIAIJ, 2636 MatMult_MPIAIJ, 2637 /* 4*/ MatMultAdd_MPIAIJ, 2638 MatMultTranspose_MPIAIJ, 2639 MatMultTransposeAdd_MPIAIJ, 2640 #ifdef PETSC_HAVE_PBGL 2641 MatSolve_MPIAIJ, 2642 #else 2643 0, 2644 #endif 2645 0, 2646 0, 2647 /*10*/ 0, 2648 0, 2649 0, 2650 MatSOR_MPIAIJ, 2651 MatTranspose_MPIAIJ, 2652 /*15*/ MatGetInfo_MPIAIJ, 2653 MatEqual_MPIAIJ, 2654 MatGetDiagonal_MPIAIJ, 2655 MatDiagonalScale_MPIAIJ, 2656 MatNorm_MPIAIJ, 2657 /*20*/ MatAssemblyBegin_MPIAIJ, 2658 MatAssemblyEnd_MPIAIJ, 2659 MatSetOption_MPIAIJ, 2660 MatZeroEntries_MPIAIJ, 2661 /*24*/ MatZeroRows_MPIAIJ, 2662 0, 2663 #ifdef PETSC_HAVE_PBGL 2664 0, 2665 #else 2666 0, 2667 #endif 2668 0, 2669 0, 2670 /*29*/ MatSetUpPreallocation_MPIAIJ, 2671 #ifdef PETSC_HAVE_PBGL 2672 0, 2673 #else 2674 0, 2675 #endif 2676 0, 2677 0, 2678 0, 2679 /*34*/ MatDuplicate_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*39*/ MatAXPY_MPIAIJ, 2685 MatGetSubMatrices_MPIAIJ, 2686 MatIncreaseOverlap_MPIAIJ, 2687 MatGetValues_MPIAIJ, 2688 MatCopy_MPIAIJ, 2689 /*44*/ MatGetRowMax_MPIAIJ, 2690 MatScale_MPIAIJ, 2691 0, 2692 0, 2693 0, 2694 /*49*/ MatSetBlockSize_MPIAIJ, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*54*/ MatFDColoringCreate_MPIAIJ, 2700 0, 2701 MatSetUnfactored_MPIAIJ, 2702 MatPermute_MPIAIJ, 2703 0, 2704 /*59*/ MatGetSubMatrix_MPIAIJ, 2705 MatDestroy_MPIAIJ, 2706 MatView_MPIAIJ, 2707 0, 2708 0, 2709 /*64*/ 0, 2710 0, 2711 0, 2712 0, 2713 0, 2714 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2715 MatGetRowMinAbs_MPIAIJ, 2716 0, 2717 MatSetColoring_MPIAIJ, 2718 #if defined(PETSC_HAVE_ADIC) 2719 MatSetValuesAdic_MPIAIJ, 2720 #else 2721 0, 2722 #endif 2723 MatSetValuesAdifor_MPIAIJ, 2724 /*75*/ MatFDColoringApply_AIJ, 2725 0, 2726 0, 2727 0, 2728 0, 2729 /*80*/ 0, 2730 0, 2731 0, 2732 /*83*/ MatLoad_MPIAIJ, 2733 0, 2734 0, 2735 0, 2736 0, 2737 0, 2738 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2739 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2740 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2741 MatPtAP_Basic, 2742 MatPtAPSymbolic_MPIAIJ, 2743 /*94*/ MatPtAPNumeric_MPIAIJ, 2744 0, 2745 0, 2746 0, 2747 0, 2748 /*99*/ 0, 2749 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2750 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2751 MatConjugate_MPIAIJ, 2752 0, 2753 /*104*/MatSetValuesRow_MPIAIJ, 2754 MatRealPart_MPIAIJ, 2755 MatImaginaryPart_MPIAIJ, 2756 0, 2757 0, 2758 /*109*/0, 2759 MatGetRedundantMatrix_MPIAIJ, 2760 MatGetRowMin_MPIAIJ, 2761 0, 2762 0, 2763 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2764 0, 2765 0, 2766 0, 2767 0, 2768 /*119*/0, 2769 0, 2770 0, 2771 0, 2772 MatGetMultiProcBlock_MPIAIJ 2773 }; 2774 2775 /* ----------------------------------------------------------------------------------------*/ 2776 2777 EXTERN_C_BEGIN 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2780 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2781 { 2782 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2787 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2788 PetscFunctionReturn(0); 2789 } 2790 EXTERN_C_END 2791 2792 EXTERN_C_BEGIN 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2795 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2796 { 2797 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2802 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2803 PetscFunctionReturn(0); 2804 } 2805 EXTERN_C_END 2806 2807 EXTERN_C_BEGIN 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2810 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2811 { 2812 Mat_MPIAIJ *b; 2813 PetscErrorCode ierr; 2814 PetscInt i; 2815 2816 PetscFunctionBegin; 2817 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2818 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2819 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2820 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2821 2822 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2823 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2824 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2825 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2826 if (d_nnz) { 2827 for (i=0; i<B->rmap->n; i++) { 2828 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2829 } 2830 } 2831 if (o_nnz) { 2832 for (i=0; i<B->rmap->n; i++) { 2833 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2834 } 2835 } 2836 b = (Mat_MPIAIJ*)B->data; 2837 2838 if (!B->preallocated) { 2839 /* Explicitly create 2 MATSEQAIJ matrices. */ 2840 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2841 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2842 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2844 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2845 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2846 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2848 } 2849 2850 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2851 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2852 B->preallocated = PETSC_TRUE; 2853 PetscFunctionReturn(0); 2854 } 2855 EXTERN_C_END 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2859 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2860 { 2861 Mat mat; 2862 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2863 PetscErrorCode ierr; 2864 2865 PetscFunctionBegin; 2866 *newmat = 0; 2867 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2868 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2869 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2870 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2871 a = (Mat_MPIAIJ*)mat->data; 2872 2873 mat->factortype = matin->factortype; 2874 mat->rmap->bs = matin->rmap->bs; 2875 mat->assembled = PETSC_TRUE; 2876 mat->insertmode = NOT_SET_VALUES; 2877 mat->preallocated = PETSC_TRUE; 2878 2879 a->size = oldmat->size; 2880 a->rank = oldmat->rank; 2881 a->donotstash = oldmat->donotstash; 2882 a->roworiented = oldmat->roworiented; 2883 a->rowindices = 0; 2884 a->rowvalues = 0; 2885 a->getrowactive = PETSC_FALSE; 2886 2887 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2888 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2889 2890 if (oldmat->colmap) { 2891 #if defined (PETSC_USE_CTABLE) 2892 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2893 #else 2894 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2895 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2896 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2897 #endif 2898 } else a->colmap = 0; 2899 if (oldmat->garray) { 2900 PetscInt len; 2901 len = oldmat->B->cmap->n; 2902 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2903 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2904 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2905 } else a->garray = 0; 2906 2907 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2908 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2909 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2910 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2911 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2912 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2913 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2914 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2915 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2916 *newmat = mat; 2917 PetscFunctionReturn(0); 2918 } 2919 2920 /* 2921 Allows sending/receiving larger messages then 2 gigabytes in a single call 2922 */ 2923 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2924 { 2925 int ierr; 2926 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2927 PetscInt i,numchunks; 2928 PetscMPIInt icnt; 2929 2930 numchunks = cnt/CHUNKSIZE + 1; 2931 for (i=0; i<numchunks; i++) { 2932 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2933 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2934 if (type == MPIU_INT) { 2935 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2936 } else if (type == MPIU_SCALAR) { 2937 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2938 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2939 } 2940 return 0; 2941 } 2942 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2943 { 2944 int ierr; 2945 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2946 MPI_Status status; 2947 PetscInt i,numchunks; 2948 PetscMPIInt icnt; 2949 2950 numchunks = cnt/CHUNKSIZE + 1; 2951 for (i=0; i<numchunks; i++) { 2952 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2953 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2954 if (type == MPIU_INT) { 2955 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2956 } else if (type == MPIU_SCALAR) { 2957 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2958 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2959 } 2960 return 0; 2961 } 2962 2963 #undef __FUNCT__ 2964 #define __FUNCT__ "MatLoad_MPIAIJ" 2965 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2966 { 2967 PetscScalar *vals,*svals; 2968 MPI_Comm comm = ((PetscObject)viewer)->comm; 2969 PetscErrorCode ierr; 2970 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2971 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2972 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2973 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2974 PetscInt cend,cstart,n,*rowners,sizesset=1; 2975 int fd; 2976 2977 PetscFunctionBegin; 2978 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2979 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2980 if (!rank) { 2981 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2982 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2983 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2984 } 2985 2986 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2987 2988 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2989 M = header[1]; N = header[2]; 2990 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2991 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2992 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2993 2994 /* If global sizes are set, check if they are consistent with that given in the file */ 2995 if (sizesset) { 2996 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2997 } 2998 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2999 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3000 3001 /* determine ownership of all rows */ 3002 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3003 else m = newMat->rmap->n; /* Set by user */ 3004 3005 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3006 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3007 3008 /* First process needs enough room for process with most rows */ 3009 if (!rank) { 3010 mmax = rowners[1]; 3011 for (i=2; i<size; i++) { 3012 mmax = PetscMax(mmax,rowners[i]); 3013 } 3014 } else mmax = m; 3015 3016 rowners[0] = 0; 3017 for (i=2; i<=size; i++) { 3018 rowners[i] += rowners[i-1]; 3019 } 3020 rstart = rowners[rank]; 3021 rend = rowners[rank+1]; 3022 3023 /* distribute row lengths to all processors */ 3024 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3025 if (!rank) { 3026 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3027 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3028 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3029 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3030 for (j=0; j<m; j++) { 3031 procsnz[0] += ourlens[j]; 3032 } 3033 for (i=1; i<size; i++) { 3034 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3035 /* calculate the number of nonzeros on each processor */ 3036 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3037 procsnz[i] += rowlengths[j]; 3038 } 3039 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3040 } 3041 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3042 } else { 3043 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3044 } 3045 3046 if (!rank) { 3047 /* determine max buffer needed and allocate it */ 3048 maxnz = 0; 3049 for (i=0; i<size; i++) { 3050 maxnz = PetscMax(maxnz,procsnz[i]); 3051 } 3052 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3053 3054 /* read in my part of the matrix column indices */ 3055 nz = procsnz[0]; 3056 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3057 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3058 3059 /* read in every one elses and ship off */ 3060 for (i=1; i<size; i++) { 3061 nz = procsnz[i]; 3062 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3063 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3064 } 3065 ierr = PetscFree(cols);CHKERRQ(ierr); 3066 } else { 3067 /* determine buffer space needed for message */ 3068 nz = 0; 3069 for (i=0; i<m; i++) { 3070 nz += ourlens[i]; 3071 } 3072 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3073 3074 /* receive message of column indices*/ 3075 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3076 } 3077 3078 /* determine column ownership if matrix is not square */ 3079 if (N != M) { 3080 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3081 else n = newMat->cmap->n; 3082 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3083 cstart = cend - n; 3084 } else { 3085 cstart = rstart; 3086 cend = rend; 3087 n = cend - cstart; 3088 } 3089 3090 /* loop over local rows, determining number of off diagonal entries */ 3091 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3092 jj = 0; 3093 for (i=0; i<m; i++) { 3094 for (j=0; j<ourlens[i]; j++) { 3095 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3096 jj++; 3097 } 3098 } 3099 3100 for (i=0; i<m; i++) { 3101 ourlens[i] -= offlens[i]; 3102 } 3103 if (!sizesset) { 3104 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3105 } 3106 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3107 3108 for (i=0; i<m; i++) { 3109 ourlens[i] += offlens[i]; 3110 } 3111 3112 if (!rank) { 3113 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3114 3115 /* read in my part of the matrix numerical values */ 3116 nz = procsnz[0]; 3117 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3118 3119 /* insert into matrix */ 3120 jj = rstart; 3121 smycols = mycols; 3122 svals = vals; 3123 for (i=0; i<m; i++) { 3124 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3125 smycols += ourlens[i]; 3126 svals += ourlens[i]; 3127 jj++; 3128 } 3129 3130 /* read in other processors and ship out */ 3131 for (i=1; i<size; i++) { 3132 nz = procsnz[i]; 3133 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3134 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3135 } 3136 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3137 } else { 3138 /* receive numeric values */ 3139 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3140 3141 /* receive message of values*/ 3142 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3143 3144 /* insert into matrix */ 3145 jj = rstart; 3146 smycols = mycols; 3147 svals = vals; 3148 for (i=0; i<m; i++) { 3149 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3150 smycols += ourlens[i]; 3151 svals += ourlens[i]; 3152 jj++; 3153 } 3154 } 3155 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3156 ierr = PetscFree(vals);CHKERRQ(ierr); 3157 ierr = PetscFree(mycols);CHKERRQ(ierr); 3158 ierr = PetscFree(rowners);CHKERRQ(ierr); 3159 3160 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3167 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3168 { 3169 PetscErrorCode ierr; 3170 IS iscol_local; 3171 PetscInt csize; 3172 3173 PetscFunctionBegin; 3174 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3175 if (call == MAT_REUSE_MATRIX) { 3176 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3177 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3178 } else { 3179 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3180 } 3181 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3182 if (call == MAT_INITIAL_MATRIX) { 3183 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3184 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3185 } 3186 PetscFunctionReturn(0); 3187 } 3188 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3191 /* 3192 Not great since it makes two copies of the submatrix, first an SeqAIJ 3193 in local and then by concatenating the local matrices the end result. 3194 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3195 3196 Note: This requires a sequential iscol with all indices. 3197 */ 3198 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3199 { 3200 PetscErrorCode ierr; 3201 PetscMPIInt rank,size; 3202 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3203 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3204 Mat *local,M,Mreuse; 3205 MatScalar *vwork,*aa; 3206 MPI_Comm comm = ((PetscObject)mat)->comm; 3207 Mat_SeqAIJ *aij; 3208 3209 3210 PetscFunctionBegin; 3211 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3212 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3213 3214 if (call == MAT_REUSE_MATRIX) { 3215 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3216 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3217 local = &Mreuse; 3218 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3219 } else { 3220 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3221 Mreuse = *local; 3222 ierr = PetscFree(local);CHKERRQ(ierr); 3223 } 3224 3225 /* 3226 m - number of local rows 3227 n - number of columns (same on all processors) 3228 rstart - first row in new global matrix generated 3229 */ 3230 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3231 if (call == MAT_INITIAL_MATRIX) { 3232 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3233 ii = aij->i; 3234 jj = aij->j; 3235 3236 /* 3237 Determine the number of non-zeros in the diagonal and off-diagonal 3238 portions of the matrix in order to do correct preallocation 3239 */ 3240 3241 /* first get start and end of "diagonal" columns */ 3242 if (csize == PETSC_DECIDE) { 3243 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3244 if (mglobal == n) { /* square matrix */ 3245 nlocal = m; 3246 } else { 3247 nlocal = n/size + ((n % size) > rank); 3248 } 3249 } else { 3250 nlocal = csize; 3251 } 3252 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3253 rstart = rend - nlocal; 3254 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3255 3256 /* next, compute all the lengths */ 3257 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3258 olens = dlens + m; 3259 for (i=0; i<m; i++) { 3260 jend = ii[i+1] - ii[i]; 3261 olen = 0; 3262 dlen = 0; 3263 for (j=0; j<jend; j++) { 3264 if (*jj < rstart || *jj >= rend) olen++; 3265 else dlen++; 3266 jj++; 3267 } 3268 olens[i] = olen; 3269 dlens[i] = dlen; 3270 } 3271 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3272 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3273 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3274 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3275 ierr = PetscFree(dlens);CHKERRQ(ierr); 3276 } else { 3277 PetscInt ml,nl; 3278 3279 M = *newmat; 3280 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3281 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3282 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3283 /* 3284 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3285 rather than the slower MatSetValues(). 3286 */ 3287 M->was_assembled = PETSC_TRUE; 3288 M->assembled = PETSC_FALSE; 3289 } 3290 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3291 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3292 ii = aij->i; 3293 jj = aij->j; 3294 aa = aij->a; 3295 for (i=0; i<m; i++) { 3296 row = rstart + i; 3297 nz = ii[i+1] - ii[i]; 3298 cwork = jj; jj += nz; 3299 vwork = aa; aa += nz; 3300 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3301 } 3302 3303 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3304 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3305 *newmat = M; 3306 3307 /* save submatrix used in processor for next request */ 3308 if (call == MAT_INITIAL_MATRIX) { 3309 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3310 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3311 } 3312 3313 PetscFunctionReturn(0); 3314 } 3315 3316 EXTERN_C_BEGIN 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3319 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3320 { 3321 PetscInt m,cstart, cend,j,nnz,i,d; 3322 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3323 const PetscInt *JJ; 3324 PetscScalar *values; 3325 PetscErrorCode ierr; 3326 3327 PetscFunctionBegin; 3328 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3329 3330 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3331 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3332 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3333 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3334 m = B->rmap->n; 3335 cstart = B->cmap->rstart; 3336 cend = B->cmap->rend; 3337 rstart = B->rmap->rstart; 3338 3339 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3340 3341 #if defined(PETSC_USE_DEBUGGING) 3342 for (i=0; i<m; i++) { 3343 nnz = Ii[i+1]- Ii[i]; 3344 JJ = J + Ii[i]; 3345 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3346 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3347 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3348 } 3349 #endif 3350 3351 for (i=0; i<m; i++) { 3352 nnz = Ii[i+1]- Ii[i]; 3353 JJ = J + Ii[i]; 3354 nnz_max = PetscMax(nnz_max,nnz); 3355 d = 0; 3356 for (j=0; j<nnz; j++) { 3357 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3358 } 3359 d_nnz[i] = d; 3360 o_nnz[i] = nnz - d; 3361 } 3362 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3363 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3364 3365 if (v) values = (PetscScalar*)v; 3366 else { 3367 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3368 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3369 } 3370 3371 for (i=0; i<m; i++) { 3372 ii = i + rstart; 3373 nnz = Ii[i+1]- Ii[i]; 3374 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3375 } 3376 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3377 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3378 3379 if (!v) { 3380 ierr = PetscFree(values);CHKERRQ(ierr); 3381 } 3382 PetscFunctionReturn(0); 3383 } 3384 EXTERN_C_END 3385 3386 #undef __FUNCT__ 3387 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3388 /*@ 3389 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3390 (the default parallel PETSc format). 3391 3392 Collective on MPI_Comm 3393 3394 Input Parameters: 3395 + B - the matrix 3396 . i - the indices into j for the start of each local row (starts with zero) 3397 . j - the column indices for each local row (starts with zero) 3398 - v - optional values in the matrix 3399 3400 Level: developer 3401 3402 Notes: 3403 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3404 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3405 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3406 3407 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3408 3409 The format which is used for the sparse matrix input, is equivalent to a 3410 row-major ordering.. i.e for the following matrix, the input data expected is 3411 as shown: 3412 3413 1 0 0 3414 2 0 3 P0 3415 ------- 3416 4 5 6 P1 3417 3418 Process0 [P0]: rows_owned=[0,1] 3419 i = {0,1,3} [size = nrow+1 = 2+1] 3420 j = {0,0,2} [size = nz = 6] 3421 v = {1,2,3} [size = nz = 6] 3422 3423 Process1 [P1]: rows_owned=[2] 3424 i = {0,3} [size = nrow+1 = 1+1] 3425 j = {0,1,2} [size = nz = 6] 3426 v = {4,5,6} [size = nz = 6] 3427 3428 .keywords: matrix, aij, compressed row, sparse, parallel 3429 3430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3431 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3432 @*/ 3433 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3434 { 3435 PetscErrorCode ierr; 3436 3437 PetscFunctionBegin; 3438 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3439 PetscFunctionReturn(0); 3440 } 3441 3442 #undef __FUNCT__ 3443 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3444 /*@C 3445 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3446 (the default parallel PETSc format). For good matrix assembly performance 3447 the user should preallocate the matrix storage by setting the parameters 3448 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3449 performance can be increased by more than a factor of 50. 3450 3451 Collective on MPI_Comm 3452 3453 Input Parameters: 3454 + A - the matrix 3455 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3456 (same value is used for all local rows) 3457 . d_nnz - array containing the number of nonzeros in the various rows of the 3458 DIAGONAL portion of the local submatrix (possibly different for each row) 3459 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3460 The size of this array is equal to the number of local rows, i.e 'm'. 3461 You must leave room for the diagonal entry even if it is zero. 3462 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3463 submatrix (same value is used for all local rows). 3464 - o_nnz - array containing the number of nonzeros in the various rows of the 3465 OFF-DIAGONAL portion of the local submatrix (possibly different for 3466 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3467 structure. The size of this array is equal to the number 3468 of local rows, i.e 'm'. 3469 3470 If the *_nnz parameter is given then the *_nz parameter is ignored 3471 3472 The AIJ format (also called the Yale sparse matrix format or 3473 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3474 storage. The stored row and column indices begin with zero. 3475 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3476 3477 The parallel matrix is partitioned such that the first m0 rows belong to 3478 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3479 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3480 3481 The DIAGONAL portion of the local submatrix of a processor can be defined 3482 as the submatrix which is obtained by extraction the part corresponding to 3483 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3484 first row that belongs to the processor, r2 is the last row belonging to 3485 the this processor, and c1-c2 is range of indices of the local part of a 3486 vector suitable for applying the matrix to. This is an mxn matrix. In the 3487 common case of a square matrix, the row and column ranges are the same and 3488 the DIAGONAL part is also square. The remaining portion of the local 3489 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3490 3491 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3492 3493 You can call MatGetInfo() to get information on how effective the preallocation was; 3494 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3495 You can also run with the option -info and look for messages with the string 3496 malloc in them to see if additional memory allocation was needed. 3497 3498 Example usage: 3499 3500 Consider the following 8x8 matrix with 34 non-zero values, that is 3501 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3502 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3503 as follows: 3504 3505 .vb 3506 1 2 0 | 0 3 0 | 0 4 3507 Proc0 0 5 6 | 7 0 0 | 8 0 3508 9 0 10 | 11 0 0 | 12 0 3509 ------------------------------------- 3510 13 0 14 | 15 16 17 | 0 0 3511 Proc1 0 18 0 | 19 20 21 | 0 0 3512 0 0 0 | 22 23 0 | 24 0 3513 ------------------------------------- 3514 Proc2 25 26 27 | 0 0 28 | 29 0 3515 30 0 0 | 31 32 33 | 0 34 3516 .ve 3517 3518 This can be represented as a collection of submatrices as: 3519 3520 .vb 3521 A B C 3522 D E F 3523 G H I 3524 .ve 3525 3526 Where the submatrices A,B,C are owned by proc0, D,E,F are 3527 owned by proc1, G,H,I are owned by proc2. 3528 3529 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3530 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3531 The 'M','N' parameters are 8,8, and have the same values on all procs. 3532 3533 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3534 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3535 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3536 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3537 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3538 matrix, ans [DF] as another SeqAIJ matrix. 3539 3540 When d_nz, o_nz parameters are specified, d_nz storage elements are 3541 allocated for every row of the local diagonal submatrix, and o_nz 3542 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3543 One way to choose d_nz and o_nz is to use the max nonzerors per local 3544 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3545 In this case, the values of d_nz,o_nz are: 3546 .vb 3547 proc0 : dnz = 2, o_nz = 2 3548 proc1 : dnz = 3, o_nz = 2 3549 proc2 : dnz = 1, o_nz = 4 3550 .ve 3551 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3552 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3553 for proc3. i.e we are using 12+15+10=37 storage locations to store 3554 34 values. 3555 3556 When d_nnz, o_nnz parameters are specified, the storage is specified 3557 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3558 In the above case the values for d_nnz,o_nnz are: 3559 .vb 3560 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3561 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3562 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3563 .ve 3564 Here the space allocated is sum of all the above values i.e 34, and 3565 hence pre-allocation is perfect. 3566 3567 Level: intermediate 3568 3569 .keywords: matrix, aij, compressed row, sparse, parallel 3570 3571 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3572 MPIAIJ, MatGetInfo() 3573 @*/ 3574 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3575 { 3576 PetscErrorCode ierr; 3577 3578 PetscFunctionBegin; 3579 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3580 PetscFunctionReturn(0); 3581 } 3582 3583 #undef __FUNCT__ 3584 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3585 /*@ 3586 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3587 CSR format the local rows. 3588 3589 Collective on MPI_Comm 3590 3591 Input Parameters: 3592 + comm - MPI communicator 3593 . m - number of local rows (Cannot be PETSC_DECIDE) 3594 . n - This value should be the same as the local size used in creating the 3595 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3596 calculated if N is given) For square matrices n is almost always m. 3597 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3598 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3599 . i - row indices 3600 . j - column indices 3601 - a - matrix values 3602 3603 Output Parameter: 3604 . mat - the matrix 3605 3606 Level: intermediate 3607 3608 Notes: 3609 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3610 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3611 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3612 3613 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3614 3615 The format which is used for the sparse matrix input, is equivalent to a 3616 row-major ordering.. i.e for the following matrix, the input data expected is 3617 as shown: 3618 3619 1 0 0 3620 2 0 3 P0 3621 ------- 3622 4 5 6 P1 3623 3624 Process0 [P0]: rows_owned=[0,1] 3625 i = {0,1,3} [size = nrow+1 = 2+1] 3626 j = {0,0,2} [size = nz = 6] 3627 v = {1,2,3} [size = nz = 6] 3628 3629 Process1 [P1]: rows_owned=[2] 3630 i = {0,3} [size = nrow+1 = 1+1] 3631 j = {0,1,2} [size = nz = 6] 3632 v = {4,5,6} [size = nz = 6] 3633 3634 .keywords: matrix, aij, compressed row, sparse, parallel 3635 3636 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3637 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3638 @*/ 3639 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3640 { 3641 PetscErrorCode ierr; 3642 3643 PetscFunctionBegin; 3644 if (i[0]) { 3645 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3646 } 3647 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3648 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3649 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3650 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3651 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3652 PetscFunctionReturn(0); 3653 } 3654 3655 #undef __FUNCT__ 3656 #define __FUNCT__ "MatCreateMPIAIJ" 3657 /*@C 3658 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3659 (the default parallel PETSc format). For good matrix assembly performance 3660 the user should preallocate the matrix storage by setting the parameters 3661 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3662 performance can be increased by more than a factor of 50. 3663 3664 Collective on MPI_Comm 3665 3666 Input Parameters: 3667 + comm - MPI communicator 3668 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3669 This value should be the same as the local size used in creating the 3670 y vector for the matrix-vector product y = Ax. 3671 . n - This value should be the same as the local size used in creating the 3672 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3673 calculated if N is given) For square matrices n is almost always m. 3674 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3675 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3676 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3677 (same value is used for all local rows) 3678 . d_nnz - array containing the number of nonzeros in the various rows of the 3679 DIAGONAL portion of the local submatrix (possibly different for each row) 3680 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3681 The size of this array is equal to the number of local rows, i.e 'm'. 3682 You must leave room for the diagonal entry even if it is zero. 3683 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3684 submatrix (same value is used for all local rows). 3685 - o_nnz - array containing the number of nonzeros in the various rows of the 3686 OFF-DIAGONAL portion of the local submatrix (possibly different for 3687 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3688 structure. The size of this array is equal to the number 3689 of local rows, i.e 'm'. 3690 3691 Output Parameter: 3692 . A - the matrix 3693 3694 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3695 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3696 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3697 3698 Notes: 3699 If the *_nnz parameter is given then the *_nz parameter is ignored 3700 3701 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3702 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3703 storage requirements for this matrix. 3704 3705 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3706 processor than it must be used on all processors that share the object for 3707 that argument. 3708 3709 The user MUST specify either the local or global matrix dimensions 3710 (possibly both). 3711 3712 The parallel matrix is partitioned across processors such that the 3713 first m0 rows belong to process 0, the next m1 rows belong to 3714 process 1, the next m2 rows belong to process 2 etc.. where 3715 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3716 values corresponding to [m x N] submatrix. 3717 3718 The columns are logically partitioned with the n0 columns belonging 3719 to 0th partition, the next n1 columns belonging to the next 3720 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3721 3722 The DIAGONAL portion of the local submatrix on any given processor 3723 is the submatrix corresponding to the rows and columns m,n 3724 corresponding to the given processor. i.e diagonal matrix on 3725 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3726 etc. The remaining portion of the local submatrix [m x (N-n)] 3727 constitute the OFF-DIAGONAL portion. The example below better 3728 illustrates this concept. 3729 3730 For a square global matrix we define each processor's diagonal portion 3731 to be its local rows and the corresponding columns (a square submatrix); 3732 each processor's off-diagonal portion encompasses the remainder of the 3733 local matrix (a rectangular submatrix). 3734 3735 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3736 3737 When calling this routine with a single process communicator, a matrix of 3738 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3739 type of communicator, use the construction mechanism: 3740 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3741 3742 By default, this format uses inodes (identical nodes) when possible. 3743 We search for consecutive rows with the same nonzero structure, thereby 3744 reusing matrix information to achieve increased efficiency. 3745 3746 Options Database Keys: 3747 + -mat_no_inode - Do not use inodes 3748 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3749 - -mat_aij_oneindex - Internally use indexing starting at 1 3750 rather than 0. Note that when calling MatSetValues(), 3751 the user still MUST index entries starting at 0! 3752 3753 3754 Example usage: 3755 3756 Consider the following 8x8 matrix with 34 non-zero values, that is 3757 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3758 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3759 as follows: 3760 3761 .vb 3762 1 2 0 | 0 3 0 | 0 4 3763 Proc0 0 5 6 | 7 0 0 | 8 0 3764 9 0 10 | 11 0 0 | 12 0 3765 ------------------------------------- 3766 13 0 14 | 15 16 17 | 0 0 3767 Proc1 0 18 0 | 19 20 21 | 0 0 3768 0 0 0 | 22 23 0 | 24 0 3769 ------------------------------------- 3770 Proc2 25 26 27 | 0 0 28 | 29 0 3771 30 0 0 | 31 32 33 | 0 34 3772 .ve 3773 3774 This can be represented as a collection of submatrices as: 3775 3776 .vb 3777 A B C 3778 D E F 3779 G H I 3780 .ve 3781 3782 Where the submatrices A,B,C are owned by proc0, D,E,F are 3783 owned by proc1, G,H,I are owned by proc2. 3784 3785 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3786 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3787 The 'M','N' parameters are 8,8, and have the same values on all procs. 3788 3789 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3790 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3791 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3792 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3793 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3794 matrix, ans [DF] as another SeqAIJ matrix. 3795 3796 When d_nz, o_nz parameters are specified, d_nz storage elements are 3797 allocated for every row of the local diagonal submatrix, and o_nz 3798 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3799 One way to choose d_nz and o_nz is to use the max nonzerors per local 3800 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3801 In this case, the values of d_nz,o_nz are: 3802 .vb 3803 proc0 : dnz = 2, o_nz = 2 3804 proc1 : dnz = 3, o_nz = 2 3805 proc2 : dnz = 1, o_nz = 4 3806 .ve 3807 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3808 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3809 for proc3. i.e we are using 12+15+10=37 storage locations to store 3810 34 values. 3811 3812 When d_nnz, o_nnz parameters are specified, the storage is specified 3813 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3814 In the above case the values for d_nnz,o_nnz are: 3815 .vb 3816 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3817 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3818 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3819 .ve 3820 Here the space allocated is sum of all the above values i.e 34, and 3821 hence pre-allocation is perfect. 3822 3823 Level: intermediate 3824 3825 .keywords: matrix, aij, compressed row, sparse, parallel 3826 3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3828 MPIAIJ, MatCreateMPIAIJWithArrays() 3829 @*/ 3830 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3831 { 3832 PetscErrorCode ierr; 3833 PetscMPIInt size; 3834 3835 PetscFunctionBegin; 3836 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3837 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3838 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3839 if (size > 1) { 3840 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3841 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3842 } else { 3843 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3844 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3845 } 3846 PetscFunctionReturn(0); 3847 } 3848 3849 #undef __FUNCT__ 3850 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3852 { 3853 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3854 3855 PetscFunctionBegin; 3856 *Ad = a->A; 3857 *Ao = a->B; 3858 *colmap = a->garray; 3859 PetscFunctionReturn(0); 3860 } 3861 3862 #undef __FUNCT__ 3863 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3864 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3865 { 3866 PetscErrorCode ierr; 3867 PetscInt i; 3868 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3869 3870 PetscFunctionBegin; 3871 if (coloring->ctype == IS_COLORING_GLOBAL) { 3872 ISColoringValue *allcolors,*colors; 3873 ISColoring ocoloring; 3874 3875 /* set coloring for diagonal portion */ 3876 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3877 3878 /* set coloring for off-diagonal portion */ 3879 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3880 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3881 for (i=0; i<a->B->cmap->n; i++) { 3882 colors[i] = allcolors[a->garray[i]]; 3883 } 3884 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3885 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3886 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3887 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3888 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3889 ISColoringValue *colors; 3890 PetscInt *larray; 3891 ISColoring ocoloring; 3892 3893 /* set coloring for diagonal portion */ 3894 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3895 for (i=0; i<a->A->cmap->n; i++) { 3896 larray[i] = i + A->cmap->rstart; 3897 } 3898 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3899 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3900 for (i=0; i<a->A->cmap->n; i++) { 3901 colors[i] = coloring->colors[larray[i]]; 3902 } 3903 ierr = PetscFree(larray);CHKERRQ(ierr); 3904 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3905 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3906 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3907 3908 /* set coloring for off-diagonal portion */ 3909 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3910 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3911 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3912 for (i=0; i<a->B->cmap->n; i++) { 3913 colors[i] = coloring->colors[larray[i]]; 3914 } 3915 ierr = PetscFree(larray);CHKERRQ(ierr); 3916 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3917 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3918 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3919 } else { 3920 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3921 } 3922 3923 PetscFunctionReturn(0); 3924 } 3925 3926 #if defined(PETSC_HAVE_ADIC) 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3929 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3930 { 3931 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3932 PetscErrorCode ierr; 3933 3934 PetscFunctionBegin; 3935 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3936 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3937 PetscFunctionReturn(0); 3938 } 3939 #endif 3940 3941 #undef __FUNCT__ 3942 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3943 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3944 { 3945 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3946 PetscErrorCode ierr; 3947 3948 PetscFunctionBegin; 3949 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3950 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3951 PetscFunctionReturn(0); 3952 } 3953 3954 #undef __FUNCT__ 3955 #define __FUNCT__ "MatMerge" 3956 /*@ 3957 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3958 matrices from each processor 3959 3960 Collective on MPI_Comm 3961 3962 Input Parameters: 3963 + comm - the communicators the parallel matrix will live on 3964 . inmat - the input sequential matrices 3965 . n - number of local columns (or PETSC_DECIDE) 3966 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3967 3968 Output Parameter: 3969 . outmat - the parallel matrix generated 3970 3971 Level: advanced 3972 3973 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3974 3975 @*/ 3976 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3977 { 3978 PetscErrorCode ierr; 3979 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3980 PetscInt *indx; 3981 PetscScalar *values; 3982 3983 PetscFunctionBegin; 3984 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3985 if (scall == MAT_INITIAL_MATRIX){ 3986 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3987 if (n == PETSC_DECIDE){ 3988 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3989 } 3990 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3991 rstart -= m; 3992 3993 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3994 for (i=0;i<m;i++) { 3995 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3996 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3997 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3998 } 3999 /* This routine will ONLY return MPIAIJ type matrix */ 4000 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4001 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4002 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4003 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4004 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4005 4006 } else if (scall == MAT_REUSE_MATRIX){ 4007 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4008 } else { 4009 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4010 } 4011 4012 for (i=0;i<m;i++) { 4013 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4014 Ii = i + rstart; 4015 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4016 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4017 } 4018 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4019 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4020 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4021 4022 PetscFunctionReturn(0); 4023 } 4024 4025 #undef __FUNCT__ 4026 #define __FUNCT__ "MatFileSplit" 4027 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4028 { 4029 PetscErrorCode ierr; 4030 PetscMPIInt rank; 4031 PetscInt m,N,i,rstart,nnz; 4032 size_t len; 4033 const PetscInt *indx; 4034 PetscViewer out; 4035 char *name; 4036 Mat B; 4037 const PetscScalar *values; 4038 4039 PetscFunctionBegin; 4040 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4041 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4042 /* Should this be the type of the diagonal block of A? */ 4043 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4044 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4045 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4046 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4047 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4048 for (i=0;i<m;i++) { 4049 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4050 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4051 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4052 } 4053 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4054 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4055 4056 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4057 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4058 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4059 sprintf(name,"%s.%d",outfile,rank); 4060 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4061 ierr = PetscFree(name); 4062 ierr = MatView(B,out);CHKERRQ(ierr); 4063 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4064 ierr = MatDestroy(B);CHKERRQ(ierr); 4065 PetscFunctionReturn(0); 4066 } 4067 4068 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4069 #undef __FUNCT__ 4070 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4071 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4072 { 4073 PetscErrorCode ierr; 4074 Mat_Merge_SeqsToMPI *merge; 4075 PetscContainer container; 4076 4077 PetscFunctionBegin; 4078 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4079 if (container) { 4080 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4081 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4082 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4083 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4084 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4085 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4086 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4087 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4088 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4089 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4090 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4091 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4092 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4093 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4094 4095 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4096 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4097 } 4098 ierr = PetscFree(merge);CHKERRQ(ierr); 4099 4100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4101 PetscFunctionReturn(0); 4102 } 4103 4104 #include "../src/mat/utils/freespace.h" 4105 #include "petscbt.h" 4106 4107 #undef __FUNCT__ 4108 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4109 /*@C 4110 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4111 matrices from each processor 4112 4113 Collective on MPI_Comm 4114 4115 Input Parameters: 4116 + comm - the communicators the parallel matrix will live on 4117 . seqmat - the input sequential matrices 4118 . m - number of local rows (or PETSC_DECIDE) 4119 . n - number of local columns (or PETSC_DECIDE) 4120 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4121 4122 Output Parameter: 4123 . mpimat - the parallel matrix generated 4124 4125 Level: advanced 4126 4127 Notes: 4128 The dimensions of the sequential matrix in each processor MUST be the same. 4129 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4130 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4131 @*/ 4132 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4133 { 4134 PetscErrorCode ierr; 4135 MPI_Comm comm=((PetscObject)mpimat)->comm; 4136 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4137 PetscMPIInt size,rank,taga,*len_s; 4138 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4139 PetscInt proc,m; 4140 PetscInt **buf_ri,**buf_rj; 4141 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4142 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4143 MPI_Request *s_waits,*r_waits; 4144 MPI_Status *status; 4145 MatScalar *aa=a->a; 4146 MatScalar **abuf_r,*ba_i; 4147 Mat_Merge_SeqsToMPI *merge; 4148 PetscContainer container; 4149 4150 PetscFunctionBegin; 4151 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4152 4153 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4154 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4155 4156 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4157 if (container) { 4158 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4159 } 4160 bi = merge->bi; 4161 bj = merge->bj; 4162 buf_ri = merge->buf_ri; 4163 buf_rj = merge->buf_rj; 4164 4165 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4166 owners = merge->rowmap->range; 4167 len_s = merge->len_s; 4168 4169 /* send and recv matrix values */ 4170 /*-----------------------------*/ 4171 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4172 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4173 4174 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4175 for (proc=0,k=0; proc<size; proc++){ 4176 if (!len_s[proc]) continue; 4177 i = owners[proc]; 4178 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4179 k++; 4180 } 4181 4182 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4183 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4184 ierr = PetscFree(status);CHKERRQ(ierr); 4185 4186 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4187 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4188 4189 /* insert mat values of mpimat */ 4190 /*----------------------------*/ 4191 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4192 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4193 4194 for (k=0; k<merge->nrecv; k++){ 4195 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4196 nrows = *(buf_ri_k[k]); 4197 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4198 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4199 } 4200 4201 /* set values of ba */ 4202 m = merge->rowmap->n; 4203 for (i=0; i<m; i++) { 4204 arow = owners[rank] + i; 4205 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4206 bnzi = bi[i+1] - bi[i]; 4207 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4208 4209 /* add local non-zero vals of this proc's seqmat into ba */ 4210 anzi = ai[arow+1] - ai[arow]; 4211 aj = a->j + ai[arow]; 4212 aa = a->a + ai[arow]; 4213 nextaj = 0; 4214 for (j=0; nextaj<anzi; j++){ 4215 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4216 ba_i[j] += aa[nextaj++]; 4217 } 4218 } 4219 4220 /* add received vals into ba */ 4221 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4222 /* i-th row */ 4223 if (i == *nextrow[k]) { 4224 anzi = *(nextai[k]+1) - *nextai[k]; 4225 aj = buf_rj[k] + *(nextai[k]); 4226 aa = abuf_r[k] + *(nextai[k]); 4227 nextaj = 0; 4228 for (j=0; nextaj<anzi; j++){ 4229 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4230 ba_i[j] += aa[nextaj++]; 4231 } 4232 } 4233 nextrow[k]++; nextai[k]++; 4234 } 4235 } 4236 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4237 } 4238 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4239 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4240 4241 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4242 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4243 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4244 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4245 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4246 PetscFunctionReturn(0); 4247 } 4248 4249 #undef __FUNCT__ 4250 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4251 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4252 { 4253 PetscErrorCode ierr; 4254 Mat B_mpi; 4255 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4256 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4257 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4258 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4259 PetscInt len,proc,*dnz,*onz; 4260 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4261 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4262 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4263 MPI_Status *status; 4264 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4265 PetscBT lnkbt; 4266 Mat_Merge_SeqsToMPI *merge; 4267 PetscContainer container; 4268 4269 PetscFunctionBegin; 4270 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4271 4272 /* make sure it is a PETSc comm */ 4273 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4274 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4275 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4276 4277 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4278 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4279 4280 /* determine row ownership */ 4281 /*---------------------------------------------------------*/ 4282 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4283 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4284 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4285 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4286 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4287 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4288 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4289 4290 m = merge->rowmap->n; 4291 M = merge->rowmap->N; 4292 owners = merge->rowmap->range; 4293 4294 /* determine the number of messages to send, their lengths */ 4295 /*---------------------------------------------------------*/ 4296 len_s = merge->len_s; 4297 4298 len = 0; /* length of buf_si[] */ 4299 merge->nsend = 0; 4300 for (proc=0; proc<size; proc++){ 4301 len_si[proc] = 0; 4302 if (proc == rank){ 4303 len_s[proc] = 0; 4304 } else { 4305 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4306 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4307 } 4308 if (len_s[proc]) { 4309 merge->nsend++; 4310 nrows = 0; 4311 for (i=owners[proc]; i<owners[proc+1]; i++){ 4312 if (ai[i+1] > ai[i]) nrows++; 4313 } 4314 len_si[proc] = 2*(nrows+1); 4315 len += len_si[proc]; 4316 } 4317 } 4318 4319 /* determine the number and length of messages to receive for ij-structure */ 4320 /*-------------------------------------------------------------------------*/ 4321 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4322 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4323 4324 /* post the Irecv of j-structure */ 4325 /*-------------------------------*/ 4326 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4327 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4328 4329 /* post the Isend of j-structure */ 4330 /*--------------------------------*/ 4331 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4332 4333 for (proc=0, k=0; proc<size; proc++){ 4334 if (!len_s[proc]) continue; 4335 i = owners[proc]; 4336 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4337 k++; 4338 } 4339 4340 /* receives and sends of j-structure are complete */ 4341 /*------------------------------------------------*/ 4342 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4343 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4344 4345 /* send and recv i-structure */ 4346 /*---------------------------*/ 4347 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4348 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4349 4350 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4351 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4352 for (proc=0,k=0; proc<size; proc++){ 4353 if (!len_s[proc]) continue; 4354 /* form outgoing message for i-structure: 4355 buf_si[0]: nrows to be sent 4356 [1:nrows]: row index (global) 4357 [nrows+1:2*nrows+1]: i-structure index 4358 */ 4359 /*-------------------------------------------*/ 4360 nrows = len_si[proc]/2 - 1; 4361 buf_si_i = buf_si + nrows+1; 4362 buf_si[0] = nrows; 4363 buf_si_i[0] = 0; 4364 nrows = 0; 4365 for (i=owners[proc]; i<owners[proc+1]; i++){ 4366 anzi = ai[i+1] - ai[i]; 4367 if (anzi) { 4368 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4369 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4370 nrows++; 4371 } 4372 } 4373 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4374 k++; 4375 buf_si += len_si[proc]; 4376 } 4377 4378 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4379 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4380 4381 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4382 for (i=0; i<merge->nrecv; i++){ 4383 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4384 } 4385 4386 ierr = PetscFree(len_si);CHKERRQ(ierr); 4387 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4388 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4389 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4390 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4391 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4392 ierr = PetscFree(status);CHKERRQ(ierr); 4393 4394 /* compute a local seq matrix in each processor */ 4395 /*----------------------------------------------*/ 4396 /* allocate bi array and free space for accumulating nonzero column info */ 4397 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4398 bi[0] = 0; 4399 4400 /* create and initialize a linked list */ 4401 nlnk = N+1; 4402 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4403 4404 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4405 len = 0; 4406 len = ai[owners[rank+1]] - ai[owners[rank]]; 4407 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4408 current_space = free_space; 4409 4410 /* determine symbolic info for each local row */ 4411 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4412 4413 for (k=0; k<merge->nrecv; k++){ 4414 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4415 nrows = *buf_ri_k[k]; 4416 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4417 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4418 } 4419 4420 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4421 len = 0; 4422 for (i=0;i<m;i++) { 4423 bnzi = 0; 4424 /* add local non-zero cols of this proc's seqmat into lnk */ 4425 arow = owners[rank] + i; 4426 anzi = ai[arow+1] - ai[arow]; 4427 aj = a->j + ai[arow]; 4428 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4429 bnzi += nlnk; 4430 /* add received col data into lnk */ 4431 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4432 if (i == *nextrow[k]) { /* i-th row */ 4433 anzi = *(nextai[k]+1) - *nextai[k]; 4434 aj = buf_rj[k] + *nextai[k]; 4435 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4436 bnzi += nlnk; 4437 nextrow[k]++; nextai[k]++; 4438 } 4439 } 4440 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4441 4442 /* if free space is not available, make more free space */ 4443 if (current_space->local_remaining<bnzi) { 4444 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4445 nspacedouble++; 4446 } 4447 /* copy data into free space, then initialize lnk */ 4448 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4449 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4450 4451 current_space->array += bnzi; 4452 current_space->local_used += bnzi; 4453 current_space->local_remaining -= bnzi; 4454 4455 bi[i+1] = bi[i] + bnzi; 4456 } 4457 4458 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4459 4460 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4461 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4462 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4463 4464 /* create symbolic parallel matrix B_mpi */ 4465 /*---------------------------------------*/ 4466 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4467 if (n==PETSC_DECIDE) { 4468 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4469 } else { 4470 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4471 } 4472 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4473 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4474 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4475 4476 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4477 B_mpi->assembled = PETSC_FALSE; 4478 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4479 merge->bi = bi; 4480 merge->bj = bj; 4481 merge->buf_ri = buf_ri; 4482 merge->buf_rj = buf_rj; 4483 merge->coi = PETSC_NULL; 4484 merge->coj = PETSC_NULL; 4485 merge->owners_co = PETSC_NULL; 4486 4487 /* attach the supporting struct to B_mpi for reuse */ 4488 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4489 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4490 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4491 *mpimat = B_mpi; 4492 4493 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4494 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4495 PetscFunctionReturn(0); 4496 } 4497 4498 #undef __FUNCT__ 4499 #define __FUNCT__ "MatMerge_SeqsToMPI" 4500 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4501 { 4502 PetscErrorCode ierr; 4503 4504 PetscFunctionBegin; 4505 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4506 if (scall == MAT_INITIAL_MATRIX){ 4507 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4508 } 4509 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4510 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4511 PetscFunctionReturn(0); 4512 } 4513 4514 #undef __FUNCT__ 4515 #define __FUNCT__ "MatGetLocalMat" 4516 /*@ 4517 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4518 4519 Not Collective 4520 4521 Input Parameters: 4522 + A - the matrix 4523 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4524 4525 Output Parameter: 4526 . A_loc - the local sequential matrix generated 4527 4528 Level: developer 4529 4530 @*/ 4531 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4532 { 4533 PetscErrorCode ierr; 4534 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4535 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4536 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4537 MatScalar *aa=a->a,*ba=b->a,*cam; 4538 PetscScalar *ca; 4539 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4540 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4541 4542 PetscFunctionBegin; 4543 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4544 if (scall == MAT_INITIAL_MATRIX){ 4545 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4546 ci[0] = 0; 4547 for (i=0; i<am; i++){ 4548 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4549 } 4550 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4551 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4552 k = 0; 4553 for (i=0; i<am; i++) { 4554 ncols_o = bi[i+1] - bi[i]; 4555 ncols_d = ai[i+1] - ai[i]; 4556 /* off-diagonal portion of A */ 4557 for (jo=0; jo<ncols_o; jo++) { 4558 col = cmap[*bj]; 4559 if (col >= cstart) break; 4560 cj[k] = col; bj++; 4561 ca[k++] = *ba++; 4562 } 4563 /* diagonal portion of A */ 4564 for (j=0; j<ncols_d; j++) { 4565 cj[k] = cstart + *aj++; 4566 ca[k++] = *aa++; 4567 } 4568 /* off-diagonal portion of A */ 4569 for (j=jo; j<ncols_o; j++) { 4570 cj[k] = cmap[*bj++]; 4571 ca[k++] = *ba++; 4572 } 4573 } 4574 /* put together the new matrix */ 4575 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4576 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4577 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4578 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4579 mat->free_a = PETSC_TRUE; 4580 mat->free_ij = PETSC_TRUE; 4581 mat->nonew = 0; 4582 } else if (scall == MAT_REUSE_MATRIX){ 4583 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4584 ci = mat->i; cj = mat->j; cam = mat->a; 4585 for (i=0; i<am; i++) { 4586 /* off-diagonal portion of A */ 4587 ncols_o = bi[i+1] - bi[i]; 4588 for (jo=0; jo<ncols_o; jo++) { 4589 col = cmap[*bj]; 4590 if (col >= cstart) break; 4591 *cam++ = *ba++; bj++; 4592 } 4593 /* diagonal portion of A */ 4594 ncols_d = ai[i+1] - ai[i]; 4595 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4596 /* off-diagonal portion of A */ 4597 for (j=jo; j<ncols_o; j++) { 4598 *cam++ = *ba++; bj++; 4599 } 4600 } 4601 } else { 4602 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4603 } 4604 4605 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4606 PetscFunctionReturn(0); 4607 } 4608 4609 #undef __FUNCT__ 4610 #define __FUNCT__ "MatGetLocalMatCondensed" 4611 /*@C 4612 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4613 4614 Not Collective 4615 4616 Input Parameters: 4617 + A - the matrix 4618 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4619 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4620 4621 Output Parameter: 4622 . A_loc - the local sequential matrix generated 4623 4624 Level: developer 4625 4626 @*/ 4627 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4628 { 4629 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4630 PetscErrorCode ierr; 4631 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4632 IS isrowa,iscola; 4633 Mat *aloc; 4634 4635 PetscFunctionBegin; 4636 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4637 if (!row){ 4638 start = A->rmap->rstart; end = A->rmap->rend; 4639 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4640 } else { 4641 isrowa = *row; 4642 } 4643 if (!col){ 4644 start = A->cmap->rstart; 4645 cmap = a->garray; 4646 nzA = a->A->cmap->n; 4647 nzB = a->B->cmap->n; 4648 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4649 ncols = 0; 4650 for (i=0; i<nzB; i++) { 4651 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4652 else break; 4653 } 4654 imark = i; 4655 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4656 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4657 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4658 } else { 4659 iscola = *col; 4660 } 4661 if (scall != MAT_INITIAL_MATRIX){ 4662 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4663 aloc[0] = *A_loc; 4664 } 4665 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4666 *A_loc = aloc[0]; 4667 ierr = PetscFree(aloc);CHKERRQ(ierr); 4668 if (!row){ 4669 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4670 } 4671 if (!col){ 4672 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4673 } 4674 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4675 PetscFunctionReturn(0); 4676 } 4677 4678 #undef __FUNCT__ 4679 #define __FUNCT__ "MatGetBrowsOfAcols" 4680 /*@C 4681 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4682 4683 Collective on Mat 4684 4685 Input Parameters: 4686 + A,B - the matrices in mpiaij format 4687 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4688 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4689 4690 Output Parameter: 4691 + rowb, colb - index sets of rows and columns of B to extract 4692 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4693 - B_seq - the sequential matrix generated 4694 4695 Level: developer 4696 4697 @*/ 4698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4699 { 4700 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4701 PetscErrorCode ierr; 4702 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4703 IS isrowb,iscolb; 4704 Mat *bseq; 4705 4706 PetscFunctionBegin; 4707 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4708 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4709 } 4710 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4711 4712 if (scall == MAT_INITIAL_MATRIX){ 4713 start = A->cmap->rstart; 4714 cmap = a->garray; 4715 nzA = a->A->cmap->n; 4716 nzB = a->B->cmap->n; 4717 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4718 ncols = 0; 4719 for (i=0; i<nzB; i++) { /* row < local row index */ 4720 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4721 else break; 4722 } 4723 imark = i; 4724 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4725 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4726 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4727 *brstart = imark; 4728 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4729 } else { 4730 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4731 isrowb = *rowb; iscolb = *colb; 4732 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4733 bseq[0] = *B_seq; 4734 } 4735 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4736 *B_seq = bseq[0]; 4737 ierr = PetscFree(bseq);CHKERRQ(ierr); 4738 if (!rowb){ 4739 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4740 } else { 4741 *rowb = isrowb; 4742 } 4743 if (!colb){ 4744 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4745 } else { 4746 *colb = iscolb; 4747 } 4748 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4749 PetscFunctionReturn(0); 4750 } 4751 4752 #undef __FUNCT__ 4753 #define __FUNCT__ "MatGetBrowsOfAoCols" 4754 /*@C 4755 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4756 of the OFF-DIAGONAL portion of local A 4757 4758 Collective on Mat 4759 4760 Input Parameters: 4761 + A,B - the matrices in mpiaij format 4762 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4763 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4764 . startsj_r - similar to startsj for receives 4765 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4766 4767 Output Parameter: 4768 + B_oth - the sequential matrix generated 4769 4770 Level: developer 4771 4772 @*/ 4773 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4774 { 4775 VecScatter_MPI_General *gen_to,*gen_from; 4776 PetscErrorCode ierr; 4777 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4778 Mat_SeqAIJ *b_oth; 4779 VecScatter ctx=a->Mvctx; 4780 MPI_Comm comm=((PetscObject)ctx)->comm; 4781 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4782 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4783 PetscScalar *rvalues,*svalues; 4784 MatScalar *b_otha,*bufa,*bufA; 4785 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4786 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4787 MPI_Status *sstatus,rstatus; 4788 PetscMPIInt jj; 4789 PetscInt *cols,sbs,rbs; 4790 PetscScalar *vals; 4791 4792 PetscFunctionBegin; 4793 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4794 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4795 } 4796 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4797 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4798 4799 gen_to = (VecScatter_MPI_General*)ctx->todata; 4800 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4801 rvalues = gen_from->values; /* holds the length of receiving row */ 4802 svalues = gen_to->values; /* holds the length of sending row */ 4803 nrecvs = gen_from->n; 4804 nsends = gen_to->n; 4805 4806 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4807 srow = gen_to->indices; /* local row index to be sent */ 4808 sstarts = gen_to->starts; 4809 sprocs = gen_to->procs; 4810 sstatus = gen_to->sstatus; 4811 sbs = gen_to->bs; 4812 rstarts = gen_from->starts; 4813 rprocs = gen_from->procs; 4814 rbs = gen_from->bs; 4815 4816 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4817 if (scall == MAT_INITIAL_MATRIX){ 4818 /* i-array */ 4819 /*---------*/ 4820 /* post receives */ 4821 for (i=0; i<nrecvs; i++){ 4822 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4823 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4824 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4825 } 4826 4827 /* pack the outgoing message */ 4828 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4829 sstartsj[0] = 0; rstartsj[0] = 0; 4830 len = 0; /* total length of j or a array to be sent */ 4831 k = 0; 4832 for (i=0; i<nsends; i++){ 4833 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4834 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4835 for (j=0; j<nrows; j++) { 4836 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4837 for (l=0; l<sbs; l++){ 4838 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4839 rowlen[j*sbs+l] = ncols; 4840 len += ncols; 4841 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4842 } 4843 k++; 4844 } 4845 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4846 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4847 } 4848 /* recvs and sends of i-array are completed */ 4849 i = nrecvs; 4850 while (i--) { 4851 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4852 } 4853 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4854 4855 /* allocate buffers for sending j and a arrays */ 4856 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4857 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4858 4859 /* create i-array of B_oth */ 4860 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4861 b_othi[0] = 0; 4862 len = 0; /* total length of j or a array to be received */ 4863 k = 0; 4864 for (i=0; i<nrecvs; i++){ 4865 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4866 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4867 for (j=0; j<nrows; j++) { 4868 b_othi[k+1] = b_othi[k] + rowlen[j]; 4869 len += rowlen[j]; k++; 4870 } 4871 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4872 } 4873 4874 /* allocate space for j and a arrrays of B_oth */ 4875 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4876 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4877 4878 /* j-array */ 4879 /*---------*/ 4880 /* post receives of j-array */ 4881 for (i=0; i<nrecvs; i++){ 4882 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4883 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4884 } 4885 4886 /* pack the outgoing message j-array */ 4887 k = 0; 4888 for (i=0; i<nsends; i++){ 4889 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4890 bufJ = bufj+sstartsj[i]; 4891 for (j=0; j<nrows; j++) { 4892 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4893 for (ll=0; ll<sbs; ll++){ 4894 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4895 for (l=0; l<ncols; l++){ 4896 *bufJ++ = cols[l]; 4897 } 4898 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4899 } 4900 } 4901 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4902 } 4903 4904 /* recvs and sends of j-array are completed */ 4905 i = nrecvs; 4906 while (i--) { 4907 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4908 } 4909 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4910 } else if (scall == MAT_REUSE_MATRIX){ 4911 sstartsj = *startsj; 4912 rstartsj = *startsj_r; 4913 bufa = *bufa_ptr; 4914 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4915 b_otha = b_oth->a; 4916 } else { 4917 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4918 } 4919 4920 /* a-array */ 4921 /*---------*/ 4922 /* post receives of a-array */ 4923 for (i=0; i<nrecvs; i++){ 4924 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4925 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4926 } 4927 4928 /* pack the outgoing message a-array */ 4929 k = 0; 4930 for (i=0; i<nsends; i++){ 4931 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4932 bufA = bufa+sstartsj[i]; 4933 for (j=0; j<nrows; j++) { 4934 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4935 for (ll=0; ll<sbs; ll++){ 4936 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4937 for (l=0; l<ncols; l++){ 4938 *bufA++ = vals[l]; 4939 } 4940 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4941 } 4942 } 4943 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4944 } 4945 /* recvs and sends of a-array are completed */ 4946 i = nrecvs; 4947 while (i--) { 4948 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4949 } 4950 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4951 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4952 4953 if (scall == MAT_INITIAL_MATRIX){ 4954 /* put together the new matrix */ 4955 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4956 4957 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4958 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4959 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4960 b_oth->free_a = PETSC_TRUE; 4961 b_oth->free_ij = PETSC_TRUE; 4962 b_oth->nonew = 0; 4963 4964 ierr = PetscFree(bufj);CHKERRQ(ierr); 4965 if (!startsj || !bufa_ptr){ 4966 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4967 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4968 } else { 4969 *startsj = sstartsj; 4970 *startsj_r = rstartsj; 4971 *bufa_ptr = bufa; 4972 } 4973 } 4974 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4975 PetscFunctionReturn(0); 4976 } 4977 4978 #undef __FUNCT__ 4979 #define __FUNCT__ "MatGetCommunicationStructs" 4980 /*@C 4981 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4982 4983 Not Collective 4984 4985 Input Parameters: 4986 . A - The matrix in mpiaij format 4987 4988 Output Parameter: 4989 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4990 . colmap - A map from global column index to local index into lvec 4991 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4992 4993 Level: developer 4994 4995 @*/ 4996 #if defined (PETSC_USE_CTABLE) 4997 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4998 #else 4999 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5000 #endif 5001 { 5002 Mat_MPIAIJ *a; 5003 5004 PetscFunctionBegin; 5005 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5006 PetscValidPointer(lvec, 2); 5007 PetscValidPointer(colmap, 3); 5008 PetscValidPointer(multScatter, 4); 5009 a = (Mat_MPIAIJ *) A->data; 5010 if (lvec) *lvec = a->lvec; 5011 if (colmap) *colmap = a->colmap; 5012 if (multScatter) *multScatter = a->Mvctx; 5013 PetscFunctionReturn(0); 5014 } 5015 5016 EXTERN_C_BEGIN 5017 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5018 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5019 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5020 EXTERN_C_END 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5024 /* 5025 Computes (B'*A')' since computing B*A directly is untenable 5026 5027 n p p 5028 ( ) ( ) ( ) 5029 m ( A ) * n ( B ) = m ( C ) 5030 ( ) ( ) ( ) 5031 5032 */ 5033 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5034 { 5035 PetscErrorCode ierr; 5036 Mat At,Bt,Ct; 5037 5038 PetscFunctionBegin; 5039 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5040 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5041 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5042 ierr = MatDestroy(At);CHKERRQ(ierr); 5043 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5044 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5045 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 #undef __FUNCT__ 5050 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5051 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5052 { 5053 PetscErrorCode ierr; 5054 PetscInt m=A->rmap->n,n=B->cmap->n; 5055 Mat Cmat; 5056 5057 PetscFunctionBegin; 5058 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5059 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5060 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5061 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5062 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5063 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5064 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5065 *C = Cmat; 5066 PetscFunctionReturn(0); 5067 } 5068 5069 /* ----------------------------------------------------------------*/ 5070 #undef __FUNCT__ 5071 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5072 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5073 { 5074 PetscErrorCode ierr; 5075 5076 PetscFunctionBegin; 5077 if (scall == MAT_INITIAL_MATRIX){ 5078 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5079 } 5080 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5081 PetscFunctionReturn(0); 5082 } 5083 5084 EXTERN_C_BEGIN 5085 #if defined(PETSC_HAVE_MUMPS) 5086 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5087 #endif 5088 #if defined(PETSC_HAVE_PASTIX) 5089 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5090 #endif 5091 #if defined(PETSC_HAVE_SUPERLU_DIST) 5092 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5093 #endif 5094 #if defined(PETSC_HAVE_SPOOLES) 5095 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5096 #endif 5097 EXTERN_C_END 5098 5099 /*MC 5100 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5101 5102 Options Database Keys: 5103 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5104 5105 Level: beginner 5106 5107 .seealso: MatCreateMPIAIJ() 5108 M*/ 5109 5110 EXTERN_C_BEGIN 5111 #undef __FUNCT__ 5112 #define __FUNCT__ "MatCreate_MPIAIJ" 5113 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5114 { 5115 Mat_MPIAIJ *b; 5116 PetscErrorCode ierr; 5117 PetscMPIInt size; 5118 5119 PetscFunctionBegin; 5120 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5121 5122 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5123 B->data = (void*)b; 5124 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5125 B->rmap->bs = 1; 5126 B->assembled = PETSC_FALSE; 5127 B->mapping = 0; 5128 5129 B->insertmode = NOT_SET_VALUES; 5130 b->size = size; 5131 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5132 5133 /* build cache for off array entries formed */ 5134 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5135 b->donotstash = PETSC_FALSE; 5136 b->colmap = 0; 5137 b->garray = 0; 5138 b->roworiented = PETSC_TRUE; 5139 5140 /* stuff used for matrix vector multiply */ 5141 b->lvec = PETSC_NULL; 5142 b->Mvctx = PETSC_NULL; 5143 5144 /* stuff for MatGetRow() */ 5145 b->rowindices = 0; 5146 b->rowvalues = 0; 5147 b->getrowactive = PETSC_FALSE; 5148 5149 #if defined(PETSC_HAVE_SPOOLES) 5150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5151 "MatGetFactor_mpiaij_spooles", 5152 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5153 #endif 5154 #if defined(PETSC_HAVE_MUMPS) 5155 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5156 "MatGetFactor_aij_mumps", 5157 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5158 #endif 5159 #if defined(PETSC_HAVE_PASTIX) 5160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5161 "MatGetFactor_mpiaij_pastix", 5162 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5163 #endif 5164 #if defined(PETSC_HAVE_SUPERLU_DIST) 5165 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5166 "MatGetFactor_mpiaij_superlu_dist", 5167 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5168 #endif 5169 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5170 "MatStoreValues_MPIAIJ", 5171 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5172 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5173 "MatRetrieveValues_MPIAIJ", 5174 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5175 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5176 "MatGetDiagonalBlock_MPIAIJ", 5177 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5178 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5179 "MatIsTranspose_MPIAIJ", 5180 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5181 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5182 "MatMPIAIJSetPreallocation_MPIAIJ", 5183 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5184 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5185 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5186 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5187 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5188 "MatDiagonalScaleLocal_MPIAIJ", 5189 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5190 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5191 "MatConvert_MPIAIJ_MPIAIJPERM", 5192 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5193 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5194 "MatConvert_MPIAIJ_MPIAIJCRL", 5195 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5196 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5197 "MatConvert_MPIAIJ_MPISBAIJ", 5198 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5199 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5200 "MatMatMult_MPIDense_MPIAIJ", 5201 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5202 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5203 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5204 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5205 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5206 "MatMatMultNumeric_MPIDense_MPIAIJ", 5207 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5208 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5209 PetscFunctionReturn(0); 5210 } 5211 EXTERN_C_END 5212 5213 #undef __FUNCT__ 5214 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5215 /*@ 5216 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5217 and "off-diagonal" part of the matrix in CSR format. 5218 5219 Collective on MPI_Comm 5220 5221 Input Parameters: 5222 + comm - MPI communicator 5223 . m - number of local rows (Cannot be PETSC_DECIDE) 5224 . n - This value should be the same as the local size used in creating the 5225 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5226 calculated if N is given) For square matrices n is almost always m. 5227 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5228 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5229 . i - row indices for "diagonal" portion of matrix 5230 . j - column indices 5231 . a - matrix values 5232 . oi - row indices for "off-diagonal" portion of matrix 5233 . oj - column indices 5234 - oa - matrix values 5235 5236 Output Parameter: 5237 . mat - the matrix 5238 5239 Level: advanced 5240 5241 Notes: 5242 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5243 5244 The i and j indices are 0 based 5245 5246 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5247 5248 This sets local rows and cannot be used to set off-processor values. 5249 5250 You cannot later use MatSetValues() to change values in this matrix. 5251 5252 .keywords: matrix, aij, compressed row, sparse, parallel 5253 5254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5255 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5256 @*/ 5257 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5258 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5259 { 5260 PetscErrorCode ierr; 5261 Mat_MPIAIJ *maij; 5262 5263 PetscFunctionBegin; 5264 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5265 if (i[0]) { 5266 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5267 } 5268 if (oi[0]) { 5269 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5270 } 5271 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5272 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5273 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5274 maij = (Mat_MPIAIJ*) (*mat)->data; 5275 maij->donotstash = PETSC_TRUE; 5276 (*mat)->preallocated = PETSC_TRUE; 5277 5278 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5279 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5280 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5281 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5282 5283 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5284 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5285 5286 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5287 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5288 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5289 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5290 5291 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5292 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5293 PetscFunctionReturn(0); 5294 } 5295 5296 /* 5297 Special version for direct calls from Fortran 5298 */ 5299 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5300 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5301 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5302 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5303 #endif 5304 5305 /* Change these macros so can be used in void function */ 5306 #undef CHKERRQ 5307 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5308 #undef SETERRQ2 5309 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5310 #undef SETERRQ 5311 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5312 5313 EXTERN_C_BEGIN 5314 #undef __FUNCT__ 5315 #define __FUNCT__ "matsetvaluesmpiaij_" 5316 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5317 { 5318 Mat mat = *mmat; 5319 PetscInt m = *mm, n = *mn; 5320 InsertMode addv = *maddv; 5321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5322 PetscScalar value; 5323 PetscErrorCode ierr; 5324 5325 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5326 if (mat->insertmode == NOT_SET_VALUES) { 5327 mat->insertmode = addv; 5328 } 5329 #if defined(PETSC_USE_DEBUG) 5330 else if (mat->insertmode != addv) { 5331 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5332 } 5333 #endif 5334 { 5335 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5336 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5337 PetscBool roworiented = aij->roworiented; 5338 5339 /* Some Variables required in the macro */ 5340 Mat A = aij->A; 5341 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5342 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5343 MatScalar *aa = a->a; 5344 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5345 Mat B = aij->B; 5346 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5347 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5348 MatScalar *ba = b->a; 5349 5350 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5351 PetscInt nonew = a->nonew; 5352 MatScalar *ap1,*ap2; 5353 5354 PetscFunctionBegin; 5355 for (i=0; i<m; i++) { 5356 if (im[i] < 0) continue; 5357 #if defined(PETSC_USE_DEBUG) 5358 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5359 #endif 5360 if (im[i] >= rstart && im[i] < rend) { 5361 row = im[i] - rstart; 5362 lastcol1 = -1; 5363 rp1 = aj + ai[row]; 5364 ap1 = aa + ai[row]; 5365 rmax1 = aimax[row]; 5366 nrow1 = ailen[row]; 5367 low1 = 0; 5368 high1 = nrow1; 5369 lastcol2 = -1; 5370 rp2 = bj + bi[row]; 5371 ap2 = ba + bi[row]; 5372 rmax2 = bimax[row]; 5373 nrow2 = bilen[row]; 5374 low2 = 0; 5375 high2 = nrow2; 5376 5377 for (j=0; j<n; j++) { 5378 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5379 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5380 if (in[j] >= cstart && in[j] < cend){ 5381 col = in[j] - cstart; 5382 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5383 } else if (in[j] < 0) continue; 5384 #if defined(PETSC_USE_DEBUG) 5385 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5386 #endif 5387 else { 5388 if (mat->was_assembled) { 5389 if (!aij->colmap) { 5390 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5391 } 5392 #if defined (PETSC_USE_CTABLE) 5393 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5394 col--; 5395 #else 5396 col = aij->colmap[in[j]] - 1; 5397 #endif 5398 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5399 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5400 col = in[j]; 5401 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5402 B = aij->B; 5403 b = (Mat_SeqAIJ*)B->data; 5404 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5405 rp2 = bj + bi[row]; 5406 ap2 = ba + bi[row]; 5407 rmax2 = bimax[row]; 5408 nrow2 = bilen[row]; 5409 low2 = 0; 5410 high2 = nrow2; 5411 bm = aij->B->rmap->n; 5412 ba = b->a; 5413 } 5414 } else col = in[j]; 5415 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5416 } 5417 } 5418 } else { 5419 if (!aij->donotstash) { 5420 if (roworiented) { 5421 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5422 } else { 5423 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5424 } 5425 } 5426 } 5427 }} 5428 PetscFunctionReturnVoid(); 5429 } 5430 EXTERN_C_END 5431 5432