1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscBool found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 if (A->nooffproczerorows) { 606 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 607 nrecvs = nsends; 608 nmax = N; 609 } else { 610 /* inform other processors of number of messages and max length*/ 611 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 612 } 613 614 /* post receives: */ 615 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 616 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 617 for (i=0; i<nrecvs; i++) { 618 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 619 } 620 621 /* do sends: 622 1) starts[i] gives the starting index in svalues for stuff going to 623 the ith processor 624 */ 625 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 626 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 627 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 628 starts[0] = 0; 629 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 for (i=0; i<N; i++) { 631 svalues[starts[owner[i]]++] = rows[i]; 632 } 633 634 starts[0] = 0; 635 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 636 count = 0; 637 for (i=0; i<size; i++) { 638 if (nprocs[2*i+1]) { 639 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 640 } 641 } 642 ierr = PetscFree(starts);CHKERRQ(ierr); 643 644 base = owners[rank]; 645 646 /* wait on receives */ 647 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 648 count = nrecvs; slen = 0; 649 while (count) { 650 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 651 /* unpack receives into our local space */ 652 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 653 source[imdex] = recv_status.MPI_SOURCE; 654 lens[imdex] = n; 655 slen += n; 656 count--; 657 } 658 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 659 660 /* move the data into the send scatter */ 661 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 662 count = 0; 663 for (i=0; i<nrecvs; i++) { 664 values = rvalues + i*nmax; 665 for (j=0; j<lens[i]; j++) { 666 lrows[count++] = values[j] - base; 667 } 668 } 669 ierr = PetscFree(rvalues);CHKERRQ(ierr); 670 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 671 ierr = PetscFree(owner);CHKERRQ(ierr); 672 ierr = PetscFree(nprocs);CHKERRQ(ierr); 673 674 /* actually zap the local rows */ 675 /* 676 Zero the required rows. If the "diagonal block" of the matrix 677 is square and the user wishes to set the diagonal we use separate 678 code so that MatSetValues() is not called for each diagonal allocating 679 new memory, thus calling lots of mallocs and slowing things down. 680 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 726 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 728 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 736 { 737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatMultAdd_MPIAIJ" 747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 748 { 749 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 755 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 763 { 764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 765 PetscErrorCode ierr; 766 PetscBool merged; 767 768 PetscFunctionBegin; 769 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 770 /* do nondiagonal part */ 771 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 772 if (!merged) { 773 /* send it on its way */ 774 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 775 /* do local part */ 776 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 777 /* receive remote parts: note this assumes the values are not actually */ 778 /* added in yy until the next line, */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } else { 781 /* do local part */ 782 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 783 /* send it on its way */ 784 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 /* values actually were received in the Begin() but we need to call this nop */ 786 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 EXTERN_C_BEGIN 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 795 { 796 MPI_Comm comm; 797 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 798 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 799 IS Me,Notme; 800 PetscErrorCode ierr; 801 PetscInt M,N,first,last,*notme,i; 802 PetscMPIInt size; 803 804 PetscFunctionBegin; 805 806 /* Easy test: symmetric diagonal block */ 807 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 808 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 809 if (!*f) PetscFunctionReturn(0); 810 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 812 if (size == 1) PetscFunctionReturn(0); 813 814 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 815 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 816 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 817 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 818 for (i=0; i<first; i++) notme[i] = i; 819 for (i=last; i<M; i++) notme[i-last+first] = i; 820 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 821 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 822 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 823 Aoff = Aoffs[0]; 824 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 825 Boff = Boffs[0]; 826 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 828 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 829 ierr = ISDestroy(Me);CHKERRQ(ierr); 830 ierr = ISDestroy(Notme);CHKERRQ(ierr); 831 832 PetscFunctionReturn(0); 833 } 834 EXTERN_C_END 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 839 { 840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 /* do nondiagonal part */ 845 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 846 /* send it on its way */ 847 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 848 /* do local part */ 849 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 850 /* receive remote parts */ 851 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 /* 856 This only works correctly for square matrices where the subblock A->A is the 857 diagonal block 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 862 { 863 PetscErrorCode ierr; 864 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 865 866 PetscFunctionBegin; 867 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 868 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 900 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_CLASSID; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscBool isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscBool inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscBool isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscBool flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1181 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1182 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1185 ierr = MatDestroy(A);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatView_MPIAIJ" 1192 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1193 { 1194 PetscErrorCode ierr; 1195 PetscBool iascii,isdraw,issocket,isbinary; 1196 1197 PetscFunctionBegin; 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1202 if (iascii || isdraw || isbinary || issocket) { 1203 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1204 } else { 1205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatSOR_MPIAIJ" 1212 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1215 PetscErrorCode ierr; 1216 Vec bb1 = 0; 1217 PetscBool hasop; 1218 1219 PetscFunctionBegin; 1220 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1221 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1222 } 1223 1224 if (flag == SOR_APPLY_UPPER) { 1225 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1230 if (flag & SOR_ZERO_INITIAL_GUESS) { 1231 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1232 its--; 1233 } 1234 1235 while (its--) { 1236 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 1239 /* update rhs: bb1 = bb - B*x */ 1240 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1241 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1242 1243 /* local sweep */ 1244 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1245 } 1246 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1247 if (flag & SOR_ZERO_INITIAL_GUESS) { 1248 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1249 its--; 1250 } 1251 while (its--) { 1252 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 1255 /* update rhs: bb1 = bb - B*x */ 1256 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1257 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1258 1259 /* local sweep */ 1260 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1261 } 1262 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1263 if (flag & SOR_ZERO_INITIAL_GUESS) { 1264 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1265 its--; 1266 } 1267 while (its--) { 1268 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 1271 /* update rhs: bb1 = bb - B*x */ 1272 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1273 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1274 1275 /* local sweep */ 1276 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1277 } 1278 } else if (flag & SOR_EISENSTAT) { 1279 Vec xx1; 1280 1281 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1282 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1283 1284 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1286 if (!mat->diag) { 1287 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1288 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1289 } 1290 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1291 if (hasop) { 1292 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1293 } else { 1294 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1295 } 1296 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1297 1298 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1299 1300 /* local sweep */ 1301 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1302 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1303 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1304 } else { 1305 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1306 } 1307 1308 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatPermute_MPIAIJ" 1314 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1315 { 1316 MPI_Comm comm,pcomm; 1317 PetscInt first,local_size,nrows; 1318 const PetscInt *rows; 1319 PetscMPIInt size; 1320 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1325 /* make a collective version of 'rowp' */ 1326 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1327 if (pcomm==comm) { 1328 crowp = rowp; 1329 } else { 1330 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1331 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1332 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1333 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1334 } 1335 /* collect the global row permutation and invert it */ 1336 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1337 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1338 if (pcomm!=comm) { 1339 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1340 } 1341 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1342 /* get the local target indices */ 1343 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1344 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1345 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1348 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1349 /* the column permutation is so much easier; 1350 make a local version of 'colp' and invert it */ 1351 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1352 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1353 if (size==1) { 1354 lcolp = colp; 1355 } else { 1356 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1357 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1358 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1359 } 1360 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1361 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1362 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1363 if (size>1) { 1364 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1365 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1366 } 1367 /* now we just get the submatrix */ 1368 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1369 /* clean up */ 1370 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1371 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1377 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1378 { 1379 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1380 Mat A = mat->A,B = mat->B; 1381 PetscErrorCode ierr; 1382 PetscReal isend[5],irecv[5]; 1383 1384 PetscFunctionBegin; 1385 info->block_size = 1.0; 1386 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1387 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1388 isend[3] = info->memory; isend[4] = info->mallocs; 1389 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1390 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1391 isend[3] += info->memory; isend[4] += info->mallocs; 1392 if (flag == MAT_LOCAL) { 1393 info->nz_used = isend[0]; 1394 info->nz_allocated = isend[1]; 1395 info->nz_unneeded = isend[2]; 1396 info->memory = isend[3]; 1397 info->mallocs = isend[4]; 1398 } else if (flag == MAT_GLOBAL_MAX) { 1399 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1400 info->nz_used = irecv[0]; 1401 info->nz_allocated = irecv[1]; 1402 info->nz_unneeded = irecv[2]; 1403 info->memory = irecv[3]; 1404 info->mallocs = irecv[4]; 1405 } else if (flag == MAT_GLOBAL_SUM) { 1406 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1407 info->nz_used = irecv[0]; 1408 info->nz_allocated = irecv[1]; 1409 info->nz_unneeded = irecv[2]; 1410 info->memory = irecv[3]; 1411 info->mallocs = irecv[4]; 1412 } 1413 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1414 info->fill_ratio_needed = 0; 1415 info->factor_mallocs = 0; 1416 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatSetOption_MPIAIJ" 1422 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1423 { 1424 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 switch (op) { 1429 case MAT_NEW_NONZERO_LOCATIONS: 1430 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1431 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1432 case MAT_KEEP_NONZERO_PATTERN: 1433 case MAT_NEW_NONZERO_LOCATION_ERR: 1434 case MAT_USE_INODES: 1435 case MAT_IGNORE_ZERO_ENTRIES: 1436 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1437 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1438 break; 1439 case MAT_ROW_ORIENTED: 1440 a->roworiented = flg; 1441 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1442 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_NEW_DIAGONALS: 1445 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1446 break; 1447 case MAT_IGNORE_OFF_PROC_ENTRIES: 1448 a->donotstash = PETSC_TRUE; 1449 break; 1450 case MAT_SPD: 1451 A->spd_set = PETSC_TRUE; 1452 A->spd = flg; 1453 if (flg) { 1454 A->symmetric = PETSC_TRUE; 1455 A->structurally_symmetric = PETSC_TRUE; 1456 A->symmetric_set = PETSC_TRUE; 1457 A->structurally_symmetric_set = PETSC_TRUE; 1458 } 1459 break; 1460 case MAT_SYMMETRIC: 1461 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1462 break; 1463 case MAT_STRUCTURALLY_SYMMETRIC: 1464 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1465 break; 1466 case MAT_HERMITIAN: 1467 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1468 break; 1469 case MAT_SYMMETRY_ETERNAL: 1470 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1471 break; 1472 default: 1473 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1474 } 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatGetRow_MPIAIJ" 1480 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1481 { 1482 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1483 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1484 PetscErrorCode ierr; 1485 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1486 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1487 PetscInt *cmap,*idx_p; 1488 1489 PetscFunctionBegin; 1490 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1491 mat->getrowactive = PETSC_TRUE; 1492 1493 if (!mat->rowvalues && (idx || v)) { 1494 /* 1495 allocate enough space to hold information from the longest row. 1496 */ 1497 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1498 PetscInt max = 1,tmp; 1499 for (i=0; i<matin->rmap->n; i++) { 1500 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1501 if (max < tmp) { max = tmp; } 1502 } 1503 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1504 } 1505 1506 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1507 lrow = row - rstart; 1508 1509 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1510 if (!v) {pvA = 0; pvB = 0;} 1511 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1512 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1513 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1514 nztot = nzA + nzB; 1515 1516 cmap = mat->garray; 1517 if (v || idx) { 1518 if (nztot) { 1519 /* Sort by increasing column numbers, assuming A and B already sorted */ 1520 PetscInt imark = -1; 1521 if (v) { 1522 *v = v_p = mat->rowvalues; 1523 for (i=0; i<nzB; i++) { 1524 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1525 else break; 1526 } 1527 imark = i; 1528 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1529 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1530 } 1531 if (idx) { 1532 *idx = idx_p = mat->rowindices; 1533 if (imark > -1) { 1534 for (i=0; i<imark; i++) { 1535 idx_p[i] = cmap[cworkB[i]]; 1536 } 1537 } else { 1538 for (i=0; i<nzB; i++) { 1539 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1540 else break; 1541 } 1542 imark = i; 1543 } 1544 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1545 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1546 } 1547 } else { 1548 if (idx) *idx = 0; 1549 if (v) *v = 0; 1550 } 1551 } 1552 *nz = nztot; 1553 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1554 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1560 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1561 { 1562 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1563 1564 PetscFunctionBegin; 1565 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1566 aij->getrowactive = PETSC_FALSE; 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatNorm_MPIAIJ" 1572 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1573 { 1574 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1575 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1576 PetscErrorCode ierr; 1577 PetscInt i,j,cstart = mat->cmap->rstart; 1578 PetscReal sum = 0.0; 1579 MatScalar *v; 1580 1581 PetscFunctionBegin; 1582 if (aij->size == 1) { 1583 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1584 } else { 1585 if (type == NORM_FROBENIUS) { 1586 v = amat->a; 1587 for (i=0; i<amat->nz; i++) { 1588 #if defined(PETSC_USE_COMPLEX) 1589 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1590 #else 1591 sum += (*v)*(*v); v++; 1592 #endif 1593 } 1594 v = bmat->a; 1595 for (i=0; i<bmat->nz; i++) { 1596 #if defined(PETSC_USE_COMPLEX) 1597 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1598 #else 1599 sum += (*v)*(*v); v++; 1600 #endif 1601 } 1602 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1603 *norm = sqrt(*norm); 1604 } else if (type == NORM_1) { /* max column norm */ 1605 PetscReal *tmp,*tmp2; 1606 PetscInt *jj,*garray = aij->garray; 1607 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1608 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1609 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1610 *norm = 0.0; 1611 v = amat->a; jj = amat->j; 1612 for (j=0; j<amat->nz; j++) { 1613 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1614 } 1615 v = bmat->a; jj = bmat->j; 1616 for (j=0; j<bmat->nz; j++) { 1617 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1618 } 1619 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1620 for (j=0; j<mat->cmap->N; j++) { 1621 if (tmp2[j] > *norm) *norm = tmp2[j]; 1622 } 1623 ierr = PetscFree(tmp);CHKERRQ(ierr); 1624 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1625 } else if (type == NORM_INFINITY) { /* max row norm */ 1626 PetscReal ntemp = 0.0; 1627 for (j=0; j<aij->A->rmap->n; j++) { 1628 v = amat->a + amat->i[j]; 1629 sum = 0.0; 1630 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1631 sum += PetscAbsScalar(*v); v++; 1632 } 1633 v = bmat->a + bmat->i[j]; 1634 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1635 sum += PetscAbsScalar(*v); v++; 1636 } 1637 if (sum > ntemp) ntemp = sum; 1638 } 1639 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1640 } else { 1641 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1642 } 1643 } 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatTranspose_MPIAIJ" 1649 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1650 { 1651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1652 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1653 PetscErrorCode ierr; 1654 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1655 PetscInt cstart=A->cmap->rstart,ncol; 1656 Mat B; 1657 MatScalar *array; 1658 1659 PetscFunctionBegin; 1660 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1661 1662 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1663 ai = Aloc->i; aj = Aloc->j; 1664 bi = Bloc->i; bj = Bloc->j; 1665 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1666 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1667 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1668 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1669 for (i=0; i<ai[ma]; i++){ 1670 d_nnz[aj[i]] ++; 1671 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1672 } 1673 1674 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1675 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1676 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1677 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1678 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1679 } else { 1680 B = *matout; 1681 } 1682 1683 /* copy over the A part */ 1684 array = Aloc->a; 1685 row = A->rmap->rstart; 1686 for (i=0; i<ma; i++) { 1687 ncol = ai[i+1]-ai[i]; 1688 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1689 row++; array += ncol; aj += ncol; 1690 } 1691 aj = Aloc->j; 1692 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1693 1694 /* copy over the B part */ 1695 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1696 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1697 array = Bloc->a; 1698 row = A->rmap->rstart; 1699 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1700 cols_tmp = cols; 1701 for (i=0; i<mb; i++) { 1702 ncol = bi[i+1]-bi[i]; 1703 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1704 row++; array += ncol; cols_tmp += ncol; 1705 } 1706 ierr = PetscFree(cols);CHKERRQ(ierr); 1707 1708 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1709 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1710 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1711 *matout = B; 1712 } else { 1713 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1720 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1721 { 1722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1723 Mat a = aij->A,b = aij->B; 1724 PetscErrorCode ierr; 1725 PetscInt s1,s2,s3; 1726 1727 PetscFunctionBegin; 1728 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1729 if (rr) { 1730 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1731 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1732 /* Overlap communication with computation. */ 1733 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1734 } 1735 if (ll) { 1736 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1737 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1738 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1739 } 1740 /* scale the diagonal block */ 1741 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1742 1743 if (rr) { 1744 /* Do a scatter end and then right scale the off-diagonal block */ 1745 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1746 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1747 } 1748 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1754 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1755 { 1756 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1761 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1762 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1763 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1768 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1769 { 1770 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatEqual_MPIAIJ" 1780 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1781 { 1782 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1783 Mat a,b,c,d; 1784 PetscBool flg; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 a = matA->A; b = matA->B; 1789 c = matB->A; d = matB->B; 1790 1791 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1792 if (flg) { 1793 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1794 } 1795 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatCopy_MPIAIJ" 1801 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1802 { 1803 PetscErrorCode ierr; 1804 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1805 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1806 1807 PetscFunctionBegin; 1808 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1809 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1810 /* because of the column compression in the off-processor part of the matrix a->B, 1811 the number of columns in a->B and b->B may be different, hence we cannot call 1812 the MatCopy() directly on the two parts. If need be, we can provide a more 1813 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1814 then copying the submatrices */ 1815 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1816 } else { 1817 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1818 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1819 } 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1825 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1826 { 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatAXPY_MPIAIJ" 1836 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1837 { 1838 PetscErrorCode ierr; 1839 PetscInt i; 1840 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1841 PetscBLASInt bnz,one=1; 1842 Mat_SeqAIJ *x,*y; 1843 1844 PetscFunctionBegin; 1845 if (str == SAME_NONZERO_PATTERN) { 1846 PetscScalar alpha = a; 1847 x = (Mat_SeqAIJ *)xx->A->data; 1848 y = (Mat_SeqAIJ *)yy->A->data; 1849 bnz = PetscBLASIntCast(x->nz); 1850 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1851 x = (Mat_SeqAIJ *)xx->B->data; 1852 y = (Mat_SeqAIJ *)yy->B->data; 1853 bnz = PetscBLASIntCast(x->nz); 1854 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1855 } else if (str == SUBSET_NONZERO_PATTERN) { 1856 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1857 1858 x = (Mat_SeqAIJ *)xx->B->data; 1859 y = (Mat_SeqAIJ *)yy->B->data; 1860 if (y->xtoy && y->XtoY != xx->B) { 1861 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1862 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1863 } 1864 if (!y->xtoy) { /* get xtoy */ 1865 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1866 y->XtoY = xx->B; 1867 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1868 } 1869 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1870 } else { 1871 Mat B; 1872 PetscInt *nnz_d,*nnz_o; 1873 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 1874 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 1875 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 1876 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1877 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1878 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 1879 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1880 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 1881 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 1882 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1883 ierr = MatHeaderReplace(Y,B); 1884 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1885 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1886 } 1887 PetscFunctionReturn(0); 1888 } 1889 1890 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatConjugate_MPIAIJ" 1894 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1895 { 1896 #if defined(PETSC_USE_COMPLEX) 1897 PetscErrorCode ierr; 1898 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1899 1900 PetscFunctionBegin; 1901 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1902 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1903 #else 1904 PetscFunctionBegin; 1905 #endif 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatRealPart_MPIAIJ" 1911 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1912 { 1913 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1914 PetscErrorCode ierr; 1915 1916 PetscFunctionBegin; 1917 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1918 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1924 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1925 { 1926 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1931 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #ifdef PETSC_HAVE_PBGL 1936 1937 #include <boost/parallel/mpi/bsp_process_group.hpp> 1938 #include <boost/graph/distributed/ilu_default_graph.hpp> 1939 #include <boost/graph/distributed/ilu_0_block.hpp> 1940 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1941 #include <boost/graph/distributed/petsc/interface.hpp> 1942 #include <boost/multi_array.hpp> 1943 #include <boost/parallel/distributed_property_map->hpp> 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1947 /* 1948 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1949 */ 1950 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1951 { 1952 namespace petsc = boost::distributed::petsc; 1953 1954 namespace graph_dist = boost::graph::distributed; 1955 using boost::graph::distributed::ilu_default::process_group_type; 1956 using boost::graph::ilu_permuted; 1957 1958 PetscBool row_identity, col_identity; 1959 PetscContainer c; 1960 PetscInt m, n, M, N; 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1965 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1966 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1967 if (!row_identity || !col_identity) { 1968 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1969 } 1970 1971 process_group_type pg; 1972 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1973 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1974 lgraph_type& level_graph = *lgraph_p; 1975 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1976 1977 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1978 ilu_permuted(level_graph); 1979 1980 /* put together the new matrix */ 1981 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1982 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1983 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1984 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1985 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1986 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 1989 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1990 ierr = PetscContainerSetPointer(c, lgraph_p); 1991 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1997 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1998 { 1999 PetscFunctionBegin; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatSolve_MPIAIJ" 2005 /* 2006 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2007 */ 2008 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2009 { 2010 namespace graph_dist = boost::graph::distributed; 2011 2012 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2013 lgraph_type* lgraph_p; 2014 PetscContainer c; 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2019 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2020 ierr = VecCopy(b, x);CHKERRQ(ierr); 2021 2022 PetscScalar* array_x; 2023 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2024 PetscInt sx; 2025 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2026 2027 PetscScalar* array_b; 2028 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2029 PetscInt sb; 2030 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2031 2032 lgraph_type& level_graph = *lgraph_p; 2033 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2034 2035 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2036 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2037 ref_x(array_x, boost::extents[num_vertices(graph)]); 2038 2039 typedef boost::iterator_property_map<array_ref_type::iterator, 2040 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2041 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2042 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2043 2044 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2045 2046 PetscFunctionReturn(0); 2047 } 2048 #endif 2049 2050 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2051 PetscInt nzlocal,nsends,nrecvs; 2052 PetscMPIInt *send_rank,*recv_rank; 2053 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2054 PetscScalar *sbuf_a,**rbuf_a; 2055 PetscErrorCode (*MatDestroy)(Mat); 2056 } Mat_Redundant; 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2060 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2061 { 2062 PetscErrorCode ierr; 2063 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2064 PetscInt i; 2065 2066 PetscFunctionBegin; 2067 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2068 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2069 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2070 for (i=0; i<redund->nrecvs; i++){ 2071 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2072 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2073 } 2074 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2075 ierr = PetscFree(redund);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatDestroy_MatRedundant" 2081 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2082 { 2083 PetscErrorCode ierr; 2084 PetscContainer container; 2085 Mat_Redundant *redund=PETSC_NULL; 2086 2087 PetscFunctionBegin; 2088 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2089 if (container) { 2090 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2091 } else { 2092 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2093 } 2094 A->ops->destroy = redund->MatDestroy; 2095 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2096 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2097 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2103 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2104 { 2105 PetscMPIInt rank,size; 2106 MPI_Comm comm=((PetscObject)mat)->comm; 2107 PetscErrorCode ierr; 2108 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2109 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2110 PetscInt *rowrange=mat->rmap->range; 2111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2112 Mat A=aij->A,B=aij->B,C=*matredundant; 2113 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2114 PetscScalar *sbuf_a; 2115 PetscInt nzlocal=a->nz+b->nz; 2116 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2117 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2118 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2119 MatScalar *aworkA,*aworkB; 2120 PetscScalar *vals; 2121 PetscMPIInt tag1,tag2,tag3,imdex; 2122 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2123 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2124 MPI_Status recv_status,*send_status; 2125 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2126 PetscInt **rbuf_j=PETSC_NULL; 2127 PetscScalar **rbuf_a=PETSC_NULL; 2128 Mat_Redundant *redund=PETSC_NULL; 2129 PetscContainer container; 2130 2131 PetscFunctionBegin; 2132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2134 2135 if (reuse == MAT_REUSE_MATRIX) { 2136 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2137 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2138 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2139 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2140 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2141 if (container) { 2142 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2143 } else { 2144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2145 } 2146 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2147 2148 nsends = redund->nsends; 2149 nrecvs = redund->nrecvs; 2150 send_rank = redund->send_rank; 2151 recv_rank = redund->recv_rank; 2152 sbuf_nz = redund->sbuf_nz; 2153 rbuf_nz = redund->rbuf_nz; 2154 sbuf_j = redund->sbuf_j; 2155 sbuf_a = redund->sbuf_a; 2156 rbuf_j = redund->rbuf_j; 2157 rbuf_a = redund->rbuf_a; 2158 } 2159 2160 if (reuse == MAT_INITIAL_MATRIX){ 2161 PetscMPIInt subrank,subsize; 2162 PetscInt nleftover,np_subcomm; 2163 /* get the destination processors' id send_rank, nsends and nrecvs */ 2164 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2165 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2166 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2167 np_subcomm = size/nsubcomm; 2168 nleftover = size - nsubcomm*np_subcomm; 2169 nsends = 0; nrecvs = 0; 2170 for (i=0; i<size; i++){ /* i=rank*/ 2171 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2172 send_rank[nsends] = i; nsends++; 2173 recv_rank[nrecvs++] = i; 2174 } 2175 } 2176 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2177 i = size-nleftover-1; 2178 j = 0; 2179 while (j < nsubcomm - nleftover){ 2180 send_rank[nsends++] = i; 2181 i--; j++; 2182 } 2183 } 2184 2185 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2186 for (i=0; i<nleftover; i++){ 2187 recv_rank[nrecvs++] = size-nleftover+i; 2188 } 2189 } 2190 2191 /* allocate sbuf_j, sbuf_a */ 2192 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2193 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2194 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2195 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2196 2197 /* copy mat's local entries into the buffers */ 2198 if (reuse == MAT_INITIAL_MATRIX){ 2199 rownz_max = 0; 2200 rptr = sbuf_j; 2201 cols = sbuf_j + rend-rstart + 1; 2202 vals = sbuf_a; 2203 rptr[0] = 0; 2204 for (i=0; i<rend-rstart; i++){ 2205 row = i + rstart; 2206 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2207 ncols = nzA + nzB; 2208 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2209 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2210 /* load the column indices for this row into cols */ 2211 lwrite = 0; 2212 for (l=0; l<nzB; l++) { 2213 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2214 vals[lwrite] = aworkB[l]; 2215 cols[lwrite++] = ctmp; 2216 } 2217 } 2218 for (l=0; l<nzA; l++){ 2219 vals[lwrite] = aworkA[l]; 2220 cols[lwrite++] = cstart + cworkA[l]; 2221 } 2222 for (l=0; l<nzB; l++) { 2223 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2224 vals[lwrite] = aworkB[l]; 2225 cols[lwrite++] = ctmp; 2226 } 2227 } 2228 vals += ncols; 2229 cols += ncols; 2230 rptr[i+1] = rptr[i] + ncols; 2231 if (rownz_max < ncols) rownz_max = ncols; 2232 } 2233 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2234 } else { /* only copy matrix values into sbuf_a */ 2235 rptr = sbuf_j; 2236 vals = sbuf_a; 2237 rptr[0] = 0; 2238 for (i=0; i<rend-rstart; i++){ 2239 row = i + rstart; 2240 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2241 ncols = nzA + nzB; 2242 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2243 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2244 lwrite = 0; 2245 for (l=0; l<nzB; l++) { 2246 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2247 } 2248 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2249 for (l=0; l<nzB; l++) { 2250 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2251 } 2252 vals += ncols; 2253 rptr[i+1] = rptr[i] + ncols; 2254 } 2255 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2256 2257 /* send nzlocal to others, and recv other's nzlocal */ 2258 /*--------------------------------------------------*/ 2259 if (reuse == MAT_INITIAL_MATRIX){ 2260 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2261 s_waits2 = s_waits3 + nsends; 2262 s_waits1 = s_waits2 + nsends; 2263 r_waits1 = s_waits1 + nsends; 2264 r_waits2 = r_waits1 + nrecvs; 2265 r_waits3 = r_waits2 + nrecvs; 2266 } else { 2267 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2268 r_waits3 = s_waits3 + nsends; 2269 } 2270 2271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2272 if (reuse == MAT_INITIAL_MATRIX){ 2273 /* get new tags to keep the communication clean */ 2274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2275 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2276 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2277 2278 /* post receives of other's nzlocal */ 2279 for (i=0; i<nrecvs; i++){ 2280 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2281 } 2282 /* send nzlocal to others */ 2283 for (i=0; i<nsends; i++){ 2284 sbuf_nz[i] = nzlocal; 2285 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2286 } 2287 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2288 count = nrecvs; 2289 while (count) { 2290 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2291 recv_rank[imdex] = recv_status.MPI_SOURCE; 2292 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2293 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2294 2295 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2296 rbuf_nz[imdex] += i + 2; 2297 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2298 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2299 count--; 2300 } 2301 /* wait on sends of nzlocal */ 2302 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2303 /* send mat->i,j to others, and recv from other's */ 2304 /*------------------------------------------------*/ 2305 for (i=0; i<nsends; i++){ 2306 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2307 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2308 } 2309 /* wait on receives of mat->i,j */ 2310 /*------------------------------*/ 2311 count = nrecvs; 2312 while (count) { 2313 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2314 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2315 count--; 2316 } 2317 /* wait on sends of mat->i,j */ 2318 /*---------------------------*/ 2319 if (nsends) { 2320 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2321 } 2322 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2323 2324 /* post receives, send and receive mat->a */ 2325 /*----------------------------------------*/ 2326 for (imdex=0; imdex<nrecvs; imdex++) { 2327 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<nsends; i++){ 2330 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2331 } 2332 count = nrecvs; 2333 while (count) { 2334 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2335 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2336 count--; 2337 } 2338 if (nsends) { 2339 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2340 } 2341 2342 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2343 2344 /* create redundant matrix */ 2345 /*-------------------------*/ 2346 if (reuse == MAT_INITIAL_MATRIX){ 2347 /* compute rownz_max for preallocation */ 2348 for (imdex=0; imdex<nrecvs; imdex++){ 2349 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2350 rptr = rbuf_j[imdex]; 2351 for (i=0; i<j; i++){ 2352 ncols = rptr[i+1] - rptr[i]; 2353 if (rownz_max < ncols) rownz_max = ncols; 2354 } 2355 } 2356 2357 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2358 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2359 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2360 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2361 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2362 } else { 2363 C = *matredundant; 2364 } 2365 2366 /* insert local matrix entries */ 2367 rptr = sbuf_j; 2368 cols = sbuf_j + rend-rstart + 1; 2369 vals = sbuf_a; 2370 for (i=0; i<rend-rstart; i++){ 2371 row = i + rstart; 2372 ncols = rptr[i+1] - rptr[i]; 2373 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2374 vals += ncols; 2375 cols += ncols; 2376 } 2377 /* insert received matrix entries */ 2378 for (imdex=0; imdex<nrecvs; imdex++){ 2379 rstart = rowrange[recv_rank[imdex]]; 2380 rend = rowrange[recv_rank[imdex]+1]; 2381 rptr = rbuf_j[imdex]; 2382 cols = rbuf_j[imdex] + rend-rstart + 1; 2383 vals = rbuf_a[imdex]; 2384 for (i=0; i<rend-rstart; i++){ 2385 row = i + rstart; 2386 ncols = rptr[i+1] - rptr[i]; 2387 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2388 vals += ncols; 2389 cols += ncols; 2390 } 2391 } 2392 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2395 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2396 if (reuse == MAT_INITIAL_MATRIX){ 2397 PetscContainer container; 2398 *matredundant = C; 2399 /* create a supporting struct and attach it to C for reuse */ 2400 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2401 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2402 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2403 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2404 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2405 2406 redund->nzlocal = nzlocal; 2407 redund->nsends = nsends; 2408 redund->nrecvs = nrecvs; 2409 redund->send_rank = send_rank; 2410 redund->recv_rank = recv_rank; 2411 redund->sbuf_nz = sbuf_nz; 2412 redund->rbuf_nz = rbuf_nz; 2413 redund->sbuf_j = sbuf_j; 2414 redund->sbuf_a = sbuf_a; 2415 redund->rbuf_j = rbuf_j; 2416 redund->rbuf_a = rbuf_a; 2417 2418 redund->MatDestroy = C->ops->destroy; 2419 C->ops->destroy = MatDestroy_MatRedundant; 2420 } 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2426 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2427 { 2428 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2429 PetscErrorCode ierr; 2430 PetscInt i,*idxb = 0; 2431 PetscScalar *va,*vb; 2432 Vec vtmp; 2433 2434 PetscFunctionBegin; 2435 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2436 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2437 if (idx) { 2438 for (i=0; i<A->rmap->n; i++) { 2439 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2440 } 2441 } 2442 2443 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2444 if (idx) { 2445 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2446 } 2447 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2448 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2449 2450 for (i=0; i<A->rmap->n; i++){ 2451 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2452 va[i] = vb[i]; 2453 if (idx) idx[i] = a->garray[idxb[i]]; 2454 } 2455 } 2456 2457 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2458 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2459 if (idxb) { 2460 ierr = PetscFree(idxb);CHKERRQ(ierr); 2461 } 2462 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2468 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2469 { 2470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2471 PetscErrorCode ierr; 2472 PetscInt i,*idxb = 0; 2473 PetscScalar *va,*vb; 2474 Vec vtmp; 2475 2476 PetscFunctionBegin; 2477 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2478 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2479 if (idx) { 2480 for (i=0; i<A->cmap->n; i++) { 2481 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2482 } 2483 } 2484 2485 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2486 if (idx) { 2487 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2488 } 2489 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2490 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2491 2492 for (i=0; i<A->rmap->n; i++){ 2493 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2494 va[i] = vb[i]; 2495 if (idx) idx[i] = a->garray[idxb[i]]; 2496 } 2497 } 2498 2499 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2500 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2501 if (idxb) { 2502 ierr = PetscFree(idxb);CHKERRQ(ierr); 2503 } 2504 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2510 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2511 { 2512 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2513 PetscInt n = A->rmap->n; 2514 PetscInt cstart = A->cmap->rstart; 2515 PetscInt *cmap = mat->garray; 2516 PetscInt *diagIdx, *offdiagIdx; 2517 Vec diagV, offdiagV; 2518 PetscScalar *a, *diagA, *offdiagA; 2519 PetscInt r; 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2524 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2525 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2526 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2527 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2528 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2529 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2530 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2531 for(r = 0; r < n; ++r) { 2532 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2533 a[r] = diagA[r]; 2534 idx[r] = cstart + diagIdx[r]; 2535 } else { 2536 a[r] = offdiagA[r]; 2537 idx[r] = cmap[offdiagIdx[r]]; 2538 } 2539 } 2540 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2541 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2542 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2543 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2544 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2545 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2551 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2552 { 2553 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2554 PetscInt n = A->rmap->n; 2555 PetscInt cstart = A->cmap->rstart; 2556 PetscInt *cmap = mat->garray; 2557 PetscInt *diagIdx, *offdiagIdx; 2558 Vec diagV, offdiagV; 2559 PetscScalar *a, *diagA, *offdiagA; 2560 PetscInt r; 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2565 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2566 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2567 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2568 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2569 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2570 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2571 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2572 for(r = 0; r < n; ++r) { 2573 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2574 a[r] = diagA[r]; 2575 idx[r] = cstart + diagIdx[r]; 2576 } else { 2577 a[r] = offdiagA[r]; 2578 idx[r] = cmap[offdiagIdx[r]]; 2579 } 2580 } 2581 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2582 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2583 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2584 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2585 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2586 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2592 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2593 { 2594 PetscErrorCode ierr; 2595 Mat *dummy; 2596 2597 PetscFunctionBegin; 2598 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2599 *newmat = *dummy; 2600 ierr = PetscFree(dummy);CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2605 /* -------------------------------------------------------------------*/ 2606 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2607 MatGetRow_MPIAIJ, 2608 MatRestoreRow_MPIAIJ, 2609 MatMult_MPIAIJ, 2610 /* 4*/ MatMultAdd_MPIAIJ, 2611 MatMultTranspose_MPIAIJ, 2612 MatMultTransposeAdd_MPIAIJ, 2613 #ifdef PETSC_HAVE_PBGL 2614 MatSolve_MPIAIJ, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*10*/ 0, 2621 0, 2622 0, 2623 MatSOR_MPIAIJ, 2624 MatTranspose_MPIAIJ, 2625 /*15*/ MatGetInfo_MPIAIJ, 2626 MatEqual_MPIAIJ, 2627 MatGetDiagonal_MPIAIJ, 2628 MatDiagonalScale_MPIAIJ, 2629 MatNorm_MPIAIJ, 2630 /*20*/ MatAssemblyBegin_MPIAIJ, 2631 MatAssemblyEnd_MPIAIJ, 2632 MatSetOption_MPIAIJ, 2633 MatZeroEntries_MPIAIJ, 2634 /*24*/ MatZeroRows_MPIAIJ, 2635 0, 2636 #ifdef PETSC_HAVE_PBGL 2637 0, 2638 #else 2639 0, 2640 #endif 2641 0, 2642 0, 2643 /*29*/ MatSetUpPreallocation_MPIAIJ, 2644 #ifdef PETSC_HAVE_PBGL 2645 0, 2646 #else 2647 0, 2648 #endif 2649 0, 2650 0, 2651 0, 2652 /*34*/ MatDuplicate_MPIAIJ, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /*39*/ MatAXPY_MPIAIJ, 2658 MatGetSubMatrices_MPIAIJ, 2659 MatIncreaseOverlap_MPIAIJ, 2660 MatGetValues_MPIAIJ, 2661 MatCopy_MPIAIJ, 2662 /*44*/ MatGetRowMax_MPIAIJ, 2663 MatScale_MPIAIJ, 2664 0, 2665 0, 2666 0, 2667 /*49*/ MatSetBlockSize_MPIAIJ, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*54*/ MatFDColoringCreate_MPIAIJ, 2673 0, 2674 MatSetUnfactored_MPIAIJ, 2675 MatPermute_MPIAIJ, 2676 0, 2677 /*59*/ MatGetSubMatrix_MPIAIJ, 2678 MatDestroy_MPIAIJ, 2679 MatView_MPIAIJ, 2680 0, 2681 0, 2682 /*64*/ 0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2688 MatGetRowMinAbs_MPIAIJ, 2689 0, 2690 MatSetColoring_MPIAIJ, 2691 #if defined(PETSC_HAVE_ADIC) 2692 MatSetValuesAdic_MPIAIJ, 2693 #else 2694 0, 2695 #endif 2696 MatSetValuesAdifor_MPIAIJ, 2697 /*75*/ MatFDColoringApply_AIJ, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*80*/ 0, 2703 0, 2704 0, 2705 /*83*/ MatLoad_MPIAIJ, 2706 0, 2707 0, 2708 0, 2709 0, 2710 0, 2711 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2712 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2713 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2714 MatPtAP_Basic, 2715 MatPtAPSymbolic_MPIAIJ, 2716 /*94*/ MatPtAPNumeric_MPIAIJ, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*99*/ 0, 2722 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2723 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2724 MatConjugate_MPIAIJ, 2725 0, 2726 /*104*/MatSetValuesRow_MPIAIJ, 2727 MatRealPart_MPIAIJ, 2728 MatImaginaryPart_MPIAIJ, 2729 0, 2730 0, 2731 /*109*/0, 2732 MatGetRedundantMatrix_MPIAIJ, 2733 MatGetRowMin_MPIAIJ, 2734 0, 2735 0, 2736 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2737 0, 2738 0, 2739 0, 2740 0, 2741 /*119*/0, 2742 0, 2743 0, 2744 0, 2745 MatGetMultiProcBlock_MPIAIJ 2746 }; 2747 2748 /* ----------------------------------------------------------------------------------------*/ 2749 2750 EXTERN_C_BEGIN 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2753 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2754 { 2755 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2760 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 EXTERN_C_END 2764 2765 EXTERN_C_BEGIN 2766 #undef __FUNCT__ 2767 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2768 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2769 { 2770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2771 PetscErrorCode ierr; 2772 2773 PetscFunctionBegin; 2774 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2775 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2776 PetscFunctionReturn(0); 2777 } 2778 EXTERN_C_END 2779 2780 EXTERN_C_BEGIN 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2784 { 2785 Mat_MPIAIJ *b; 2786 PetscErrorCode ierr; 2787 PetscInt i; 2788 2789 PetscFunctionBegin; 2790 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2791 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2792 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2793 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2794 2795 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2796 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2797 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2798 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2799 if (d_nnz) { 2800 for (i=0; i<B->rmap->n; i++) { 2801 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2802 } 2803 } 2804 if (o_nnz) { 2805 for (i=0; i<B->rmap->n; i++) { 2806 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2807 } 2808 } 2809 b = (Mat_MPIAIJ*)B->data; 2810 2811 if (!B->preallocated) { 2812 /* Explicitly create 2 MATSEQAIJ matrices. */ 2813 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2814 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2815 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2816 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2817 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2818 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2819 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2820 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2821 } 2822 2823 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2824 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2825 B->preallocated = PETSC_TRUE; 2826 PetscFunctionReturn(0); 2827 } 2828 EXTERN_C_END 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2833 { 2834 Mat mat; 2835 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 *newmat = 0; 2840 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2841 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2842 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2843 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2844 a = (Mat_MPIAIJ*)mat->data; 2845 2846 mat->factortype = matin->factortype; 2847 mat->rmap->bs = matin->rmap->bs; 2848 mat->assembled = PETSC_TRUE; 2849 mat->insertmode = NOT_SET_VALUES; 2850 mat->preallocated = PETSC_TRUE; 2851 2852 a->size = oldmat->size; 2853 a->rank = oldmat->rank; 2854 a->donotstash = oldmat->donotstash; 2855 a->roworiented = oldmat->roworiented; 2856 a->rowindices = 0; 2857 a->rowvalues = 0; 2858 a->getrowactive = PETSC_FALSE; 2859 2860 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2861 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2862 2863 if (oldmat->colmap) { 2864 #if defined (PETSC_USE_CTABLE) 2865 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2866 #else 2867 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2868 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2869 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2870 #endif 2871 } else a->colmap = 0; 2872 if (oldmat->garray) { 2873 PetscInt len; 2874 len = oldmat->B->cmap->n; 2875 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2876 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2877 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2878 } else a->garray = 0; 2879 2880 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2882 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2883 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2884 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2886 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2888 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2889 *newmat = mat; 2890 PetscFunctionReturn(0); 2891 } 2892 2893 /* 2894 Allows sending/receiving larger messages then 2 gigabytes in a single call 2895 */ 2896 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2897 { 2898 int ierr; 2899 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2900 PetscInt i,numchunks; 2901 PetscMPIInt icnt; 2902 2903 numchunks = cnt/CHUNKSIZE + 1; 2904 for (i=0; i<numchunks; i++) { 2905 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2906 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2907 if (type == MPIU_INT) { 2908 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2909 } else if (type == MPIU_SCALAR) { 2910 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2911 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2912 } 2913 return 0; 2914 } 2915 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2916 { 2917 int ierr; 2918 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2919 MPI_Status status; 2920 PetscInt i,numchunks; 2921 PetscMPIInt icnt; 2922 2923 numchunks = cnt/CHUNKSIZE + 1; 2924 for (i=0; i<numchunks; i++) { 2925 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2926 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2927 if (type == MPIU_INT) { 2928 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2929 } else if (type == MPIU_SCALAR) { 2930 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2931 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2932 } 2933 return 0; 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "MatLoad_MPIAIJ" 2938 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2939 { 2940 PetscScalar *vals,*svals; 2941 MPI_Comm comm = ((PetscObject)viewer)->comm; 2942 PetscErrorCode ierr; 2943 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2944 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2945 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2946 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2947 PetscInt cend,cstart,n,*rowners,sizesset=1; 2948 int fd; 2949 2950 PetscFunctionBegin; 2951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2952 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2953 if (!rank) { 2954 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2955 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2956 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2957 } 2958 2959 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2960 2961 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2962 M = header[1]; N = header[2]; 2963 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2964 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2965 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2966 2967 /* If global sizes are set, check if they are consistent with that given in the file */ 2968 if (sizesset) { 2969 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2970 } 2971 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2972 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2973 2974 /* determine ownership of all rows */ 2975 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2976 else m = newMat->rmap->n; /* Set by user */ 2977 2978 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2979 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2980 2981 /* First process needs enough room for process with most rows */ 2982 if (!rank) { 2983 mmax = rowners[1]; 2984 for (i=2; i<size; i++) { 2985 mmax = PetscMax(mmax,rowners[i]); 2986 } 2987 } else mmax = m; 2988 2989 rowners[0] = 0; 2990 for (i=2; i<=size; i++) { 2991 rowners[i] += rowners[i-1]; 2992 } 2993 rstart = rowners[rank]; 2994 rend = rowners[rank+1]; 2995 2996 /* distribute row lengths to all processors */ 2997 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2998 if (!rank) { 2999 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3000 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3001 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3002 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3003 for (j=0; j<m; j++) { 3004 procsnz[0] += ourlens[j]; 3005 } 3006 for (i=1; i<size; i++) { 3007 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3008 /* calculate the number of nonzeros on each processor */ 3009 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3010 procsnz[i] += rowlengths[j]; 3011 } 3012 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3013 } 3014 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3015 } else { 3016 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3017 } 3018 3019 if (!rank) { 3020 /* determine max buffer needed and allocate it */ 3021 maxnz = 0; 3022 for (i=0; i<size; i++) { 3023 maxnz = PetscMax(maxnz,procsnz[i]); 3024 } 3025 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3026 3027 /* read in my part of the matrix column indices */ 3028 nz = procsnz[0]; 3029 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3030 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3031 3032 /* read in every one elses and ship off */ 3033 for (i=1; i<size; i++) { 3034 nz = procsnz[i]; 3035 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3036 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3037 } 3038 ierr = PetscFree(cols);CHKERRQ(ierr); 3039 } else { 3040 /* determine buffer space needed for message */ 3041 nz = 0; 3042 for (i=0; i<m; i++) { 3043 nz += ourlens[i]; 3044 } 3045 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3046 3047 /* receive message of column indices*/ 3048 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3049 } 3050 3051 /* determine column ownership if matrix is not square */ 3052 if (N != M) { 3053 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3054 else n = newMat->cmap->n; 3055 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3056 cstart = cend - n; 3057 } else { 3058 cstart = rstart; 3059 cend = rend; 3060 n = cend - cstart; 3061 } 3062 3063 /* loop over local rows, determining number of off diagonal entries */ 3064 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3065 jj = 0; 3066 for (i=0; i<m; i++) { 3067 for (j=0; j<ourlens[i]; j++) { 3068 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3069 jj++; 3070 } 3071 } 3072 3073 for (i=0; i<m; i++) { 3074 ourlens[i] -= offlens[i]; 3075 } 3076 if (!sizesset) { 3077 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3078 } 3079 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3080 3081 for (i=0; i<m; i++) { 3082 ourlens[i] += offlens[i]; 3083 } 3084 3085 if (!rank) { 3086 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3087 3088 /* read in my part of the matrix numerical values */ 3089 nz = procsnz[0]; 3090 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3091 3092 /* insert into matrix */ 3093 jj = rstart; 3094 smycols = mycols; 3095 svals = vals; 3096 for (i=0; i<m; i++) { 3097 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3098 smycols += ourlens[i]; 3099 svals += ourlens[i]; 3100 jj++; 3101 } 3102 3103 /* read in other processors and ship out */ 3104 for (i=1; i<size; i++) { 3105 nz = procsnz[i]; 3106 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3107 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3108 } 3109 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3110 } else { 3111 /* receive numeric values */ 3112 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3113 3114 /* receive message of values*/ 3115 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3116 3117 /* insert into matrix */ 3118 jj = rstart; 3119 smycols = mycols; 3120 svals = vals; 3121 for (i=0; i<m; i++) { 3122 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3123 smycols += ourlens[i]; 3124 svals += ourlens[i]; 3125 jj++; 3126 } 3127 } 3128 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3129 ierr = PetscFree(vals);CHKERRQ(ierr); 3130 ierr = PetscFree(mycols);CHKERRQ(ierr); 3131 ierr = PetscFree(rowners);CHKERRQ(ierr); 3132 3133 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3135 PetscFunctionReturn(0); 3136 } 3137 3138 #undef __FUNCT__ 3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3140 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3141 { 3142 PetscErrorCode ierr; 3143 IS iscol_local; 3144 PetscInt csize; 3145 3146 PetscFunctionBegin; 3147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3148 if (call == MAT_REUSE_MATRIX) { 3149 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3150 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3151 } else { 3152 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3153 } 3154 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3155 if (call == MAT_INITIAL_MATRIX) { 3156 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3157 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3158 } 3159 PetscFunctionReturn(0); 3160 } 3161 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3164 /* 3165 Not great since it makes two copies of the submatrix, first an SeqAIJ 3166 in local and then by concatenating the local matrices the end result. 3167 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3168 3169 Note: This requires a sequential iscol with all indices. 3170 */ 3171 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3172 { 3173 PetscErrorCode ierr; 3174 PetscMPIInt rank,size; 3175 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3176 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3177 Mat *local,M,Mreuse; 3178 MatScalar *vwork,*aa; 3179 MPI_Comm comm = ((PetscObject)mat)->comm; 3180 Mat_SeqAIJ *aij; 3181 3182 3183 PetscFunctionBegin; 3184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3185 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3186 3187 if (call == MAT_REUSE_MATRIX) { 3188 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3189 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3190 local = &Mreuse; 3191 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3192 } else { 3193 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3194 Mreuse = *local; 3195 ierr = PetscFree(local);CHKERRQ(ierr); 3196 } 3197 3198 /* 3199 m - number of local rows 3200 n - number of columns (same on all processors) 3201 rstart - first row in new global matrix generated 3202 */ 3203 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3204 if (call == MAT_INITIAL_MATRIX) { 3205 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3206 ii = aij->i; 3207 jj = aij->j; 3208 3209 /* 3210 Determine the number of non-zeros in the diagonal and off-diagonal 3211 portions of the matrix in order to do correct preallocation 3212 */ 3213 3214 /* first get start and end of "diagonal" columns */ 3215 if (csize == PETSC_DECIDE) { 3216 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3217 if (mglobal == n) { /* square matrix */ 3218 nlocal = m; 3219 } else { 3220 nlocal = n/size + ((n % size) > rank); 3221 } 3222 } else { 3223 nlocal = csize; 3224 } 3225 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3226 rstart = rend - nlocal; 3227 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3228 3229 /* next, compute all the lengths */ 3230 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3231 olens = dlens + m; 3232 for (i=0; i<m; i++) { 3233 jend = ii[i+1] - ii[i]; 3234 olen = 0; 3235 dlen = 0; 3236 for (j=0; j<jend; j++) { 3237 if (*jj < rstart || *jj >= rend) olen++; 3238 else dlen++; 3239 jj++; 3240 } 3241 olens[i] = olen; 3242 dlens[i] = dlen; 3243 } 3244 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3245 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3246 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3247 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3248 ierr = PetscFree(dlens);CHKERRQ(ierr); 3249 } else { 3250 PetscInt ml,nl; 3251 3252 M = *newmat; 3253 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3254 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3255 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3256 /* 3257 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3258 rather than the slower MatSetValues(). 3259 */ 3260 M->was_assembled = PETSC_TRUE; 3261 M->assembled = PETSC_FALSE; 3262 } 3263 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3264 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3265 ii = aij->i; 3266 jj = aij->j; 3267 aa = aij->a; 3268 for (i=0; i<m; i++) { 3269 row = rstart + i; 3270 nz = ii[i+1] - ii[i]; 3271 cwork = jj; jj += nz; 3272 vwork = aa; aa += nz; 3273 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3274 } 3275 3276 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3277 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3278 *newmat = M; 3279 3280 /* save submatrix used in processor for next request */ 3281 if (call == MAT_INITIAL_MATRIX) { 3282 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3283 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3284 } 3285 3286 PetscFunctionReturn(0); 3287 } 3288 3289 EXTERN_C_BEGIN 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3292 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3293 { 3294 PetscInt m,cstart, cend,j,nnz,i,d; 3295 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3296 const PetscInt *JJ; 3297 PetscScalar *values; 3298 PetscErrorCode ierr; 3299 3300 PetscFunctionBegin; 3301 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3302 3303 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3304 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3305 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3306 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3307 m = B->rmap->n; 3308 cstart = B->cmap->rstart; 3309 cend = B->cmap->rend; 3310 rstart = B->rmap->rstart; 3311 3312 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3313 3314 #if defined(PETSC_USE_DEBUGGING) 3315 for (i=0; i<m; i++) { 3316 nnz = Ii[i+1]- Ii[i]; 3317 JJ = J + Ii[i]; 3318 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3319 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3320 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3321 } 3322 #endif 3323 3324 for (i=0; i<m; i++) { 3325 nnz = Ii[i+1]- Ii[i]; 3326 JJ = J + Ii[i]; 3327 nnz_max = PetscMax(nnz_max,nnz); 3328 d = 0; 3329 for (j=0; j<nnz; j++) { 3330 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3331 } 3332 d_nnz[i] = d; 3333 o_nnz[i] = nnz - d; 3334 } 3335 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3336 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3337 3338 if (v) values = (PetscScalar*)v; 3339 else { 3340 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3341 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3342 } 3343 3344 for (i=0; i<m; i++) { 3345 ii = i + rstart; 3346 nnz = Ii[i+1]- Ii[i]; 3347 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3348 } 3349 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3350 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3351 3352 if (!v) { 3353 ierr = PetscFree(values);CHKERRQ(ierr); 3354 } 3355 PetscFunctionReturn(0); 3356 } 3357 EXTERN_C_END 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3361 /*@ 3362 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3363 (the default parallel PETSc format). 3364 3365 Collective on MPI_Comm 3366 3367 Input Parameters: 3368 + B - the matrix 3369 . i - the indices into j for the start of each local row (starts with zero) 3370 . j - the column indices for each local row (starts with zero) 3371 - v - optional values in the matrix 3372 3373 Level: developer 3374 3375 Notes: 3376 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3377 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3378 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3379 3380 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3381 3382 The format which is used for the sparse matrix input, is equivalent to a 3383 row-major ordering.. i.e for the following matrix, the input data expected is 3384 as shown: 3385 3386 1 0 0 3387 2 0 3 P0 3388 ------- 3389 4 5 6 P1 3390 3391 Process0 [P0]: rows_owned=[0,1] 3392 i = {0,1,3} [size = nrow+1 = 2+1] 3393 j = {0,0,2} [size = nz = 6] 3394 v = {1,2,3} [size = nz = 6] 3395 3396 Process1 [P1]: rows_owned=[2] 3397 i = {0,3} [size = nrow+1 = 1+1] 3398 j = {0,1,2} [size = nz = 6] 3399 v = {4,5,6} [size = nz = 6] 3400 3401 .keywords: matrix, aij, compressed row, sparse, parallel 3402 3403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3404 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3405 @*/ 3406 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3407 { 3408 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3409 3410 PetscFunctionBegin; 3411 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3412 if (f) { 3413 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3414 } 3415 PetscFunctionReturn(0); 3416 } 3417 3418 #undef __FUNCT__ 3419 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3420 /*@C 3421 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3422 (the default parallel PETSc format). For good matrix assembly performance 3423 the user should preallocate the matrix storage by setting the parameters 3424 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3425 performance can be increased by more than a factor of 50. 3426 3427 Collective on MPI_Comm 3428 3429 Input Parameters: 3430 + A - the matrix 3431 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3432 (same value is used for all local rows) 3433 . d_nnz - array containing the number of nonzeros in the various rows of the 3434 DIAGONAL portion of the local submatrix (possibly different for each row) 3435 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3436 The size of this array is equal to the number of local rows, i.e 'm'. 3437 You must leave room for the diagonal entry even if it is zero. 3438 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3439 submatrix (same value is used for all local rows). 3440 - o_nnz - array containing the number of nonzeros in the various rows of the 3441 OFF-DIAGONAL portion of the local submatrix (possibly different for 3442 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3443 structure. The size of this array is equal to the number 3444 of local rows, i.e 'm'. 3445 3446 If the *_nnz parameter is given then the *_nz parameter is ignored 3447 3448 The AIJ format (also called the Yale sparse matrix format or 3449 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3450 storage. The stored row and column indices begin with zero. 3451 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3452 3453 The parallel matrix is partitioned such that the first m0 rows belong to 3454 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3455 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3456 3457 The DIAGONAL portion of the local submatrix of a processor can be defined 3458 as the submatrix which is obtained by extraction the part corresponding to 3459 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3460 first row that belongs to the processor, r2 is the last row belonging to 3461 the this processor, and c1-c2 is range of indices of the local part of a 3462 vector suitable for applying the matrix to. This is an mxn matrix. In the 3463 common case of a square matrix, the row and column ranges are the same and 3464 the DIAGONAL part is also square. The remaining portion of the local 3465 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3466 3467 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3468 3469 You can call MatGetInfo() to get information on how effective the preallocation was; 3470 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3471 You can also run with the option -info and look for messages with the string 3472 malloc in them to see if additional memory allocation was needed. 3473 3474 Example usage: 3475 3476 Consider the following 8x8 matrix with 34 non-zero values, that is 3477 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3478 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3479 as follows: 3480 3481 .vb 3482 1 2 0 | 0 3 0 | 0 4 3483 Proc0 0 5 6 | 7 0 0 | 8 0 3484 9 0 10 | 11 0 0 | 12 0 3485 ------------------------------------- 3486 13 0 14 | 15 16 17 | 0 0 3487 Proc1 0 18 0 | 19 20 21 | 0 0 3488 0 0 0 | 22 23 0 | 24 0 3489 ------------------------------------- 3490 Proc2 25 26 27 | 0 0 28 | 29 0 3491 30 0 0 | 31 32 33 | 0 34 3492 .ve 3493 3494 This can be represented as a collection of submatrices as: 3495 3496 .vb 3497 A B C 3498 D E F 3499 G H I 3500 .ve 3501 3502 Where the submatrices A,B,C are owned by proc0, D,E,F are 3503 owned by proc1, G,H,I are owned by proc2. 3504 3505 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3506 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3507 The 'M','N' parameters are 8,8, and have the same values on all procs. 3508 3509 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3510 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3511 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3512 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3513 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3514 matrix, ans [DF] as another SeqAIJ matrix. 3515 3516 When d_nz, o_nz parameters are specified, d_nz storage elements are 3517 allocated for every row of the local diagonal submatrix, and o_nz 3518 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3519 One way to choose d_nz and o_nz is to use the max nonzerors per local 3520 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3521 In this case, the values of d_nz,o_nz are: 3522 .vb 3523 proc0 : dnz = 2, o_nz = 2 3524 proc1 : dnz = 3, o_nz = 2 3525 proc2 : dnz = 1, o_nz = 4 3526 .ve 3527 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3528 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3529 for proc3. i.e we are using 12+15+10=37 storage locations to store 3530 34 values. 3531 3532 When d_nnz, o_nnz parameters are specified, the storage is specified 3533 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3534 In the above case the values for d_nnz,o_nnz are: 3535 .vb 3536 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3537 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3538 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3539 .ve 3540 Here the space allocated is sum of all the above values i.e 34, and 3541 hence pre-allocation is perfect. 3542 3543 Level: intermediate 3544 3545 .keywords: matrix, aij, compressed row, sparse, parallel 3546 3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3548 MPIAIJ, MatGetInfo() 3549 @*/ 3550 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3551 { 3552 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3553 3554 PetscFunctionBegin; 3555 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3556 if (f) { 3557 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3558 } 3559 PetscFunctionReturn(0); 3560 } 3561 3562 #undef __FUNCT__ 3563 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3564 /*@ 3565 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3566 CSR format the local rows. 3567 3568 Collective on MPI_Comm 3569 3570 Input Parameters: 3571 + comm - MPI communicator 3572 . m - number of local rows (Cannot be PETSC_DECIDE) 3573 . n - This value should be the same as the local size used in creating the 3574 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3575 calculated if N is given) For square matrices n is almost always m. 3576 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3577 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3578 . i - row indices 3579 . j - column indices 3580 - a - matrix values 3581 3582 Output Parameter: 3583 . mat - the matrix 3584 3585 Level: intermediate 3586 3587 Notes: 3588 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3589 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3590 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3591 3592 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3593 3594 The format which is used for the sparse matrix input, is equivalent to a 3595 row-major ordering.. i.e for the following matrix, the input data expected is 3596 as shown: 3597 3598 1 0 0 3599 2 0 3 P0 3600 ------- 3601 4 5 6 P1 3602 3603 Process0 [P0]: rows_owned=[0,1] 3604 i = {0,1,3} [size = nrow+1 = 2+1] 3605 j = {0,0,2} [size = nz = 6] 3606 v = {1,2,3} [size = nz = 6] 3607 3608 Process1 [P1]: rows_owned=[2] 3609 i = {0,3} [size = nrow+1 = 1+1] 3610 j = {0,1,2} [size = nz = 6] 3611 v = {4,5,6} [size = nz = 6] 3612 3613 .keywords: matrix, aij, compressed row, sparse, parallel 3614 3615 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3616 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3617 @*/ 3618 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3619 { 3620 PetscErrorCode ierr; 3621 3622 PetscFunctionBegin; 3623 if (i[0]) { 3624 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3625 } 3626 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3627 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3628 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3629 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3630 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3631 PetscFunctionReturn(0); 3632 } 3633 3634 #undef __FUNCT__ 3635 #define __FUNCT__ "MatCreateMPIAIJ" 3636 /*@C 3637 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3638 (the default parallel PETSc format). For good matrix assembly performance 3639 the user should preallocate the matrix storage by setting the parameters 3640 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3641 performance can be increased by more than a factor of 50. 3642 3643 Collective on MPI_Comm 3644 3645 Input Parameters: 3646 + comm - MPI communicator 3647 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3648 This value should be the same as the local size used in creating the 3649 y vector for the matrix-vector product y = Ax. 3650 . n - This value should be the same as the local size used in creating the 3651 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3652 calculated if N is given) For square matrices n is almost always m. 3653 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3654 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3655 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3656 (same value is used for all local rows) 3657 . d_nnz - array containing the number of nonzeros in the various rows of the 3658 DIAGONAL portion of the local submatrix (possibly different for each row) 3659 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3660 The size of this array is equal to the number of local rows, i.e 'm'. 3661 You must leave room for the diagonal entry even if it is zero. 3662 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3663 submatrix (same value is used for all local rows). 3664 - o_nnz - array containing the number of nonzeros in the various rows of the 3665 OFF-DIAGONAL portion of the local submatrix (possibly different for 3666 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3667 structure. The size of this array is equal to the number 3668 of local rows, i.e 'm'. 3669 3670 Output Parameter: 3671 . A - the matrix 3672 3673 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3674 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3675 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3676 3677 Notes: 3678 If the *_nnz parameter is given then the *_nz parameter is ignored 3679 3680 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3681 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3682 storage requirements for this matrix. 3683 3684 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3685 processor than it must be used on all processors that share the object for 3686 that argument. 3687 3688 The user MUST specify either the local or global matrix dimensions 3689 (possibly both). 3690 3691 The parallel matrix is partitioned across processors such that the 3692 first m0 rows belong to process 0, the next m1 rows belong to 3693 process 1, the next m2 rows belong to process 2 etc.. where 3694 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3695 values corresponding to [m x N] submatrix. 3696 3697 The columns are logically partitioned with the n0 columns belonging 3698 to 0th partition, the next n1 columns belonging to the next 3699 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3700 3701 The DIAGONAL portion of the local submatrix on any given processor 3702 is the submatrix corresponding to the rows and columns m,n 3703 corresponding to the given processor. i.e diagonal matrix on 3704 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3705 etc. The remaining portion of the local submatrix [m x (N-n)] 3706 constitute the OFF-DIAGONAL portion. The example below better 3707 illustrates this concept. 3708 3709 For a square global matrix we define each processor's diagonal portion 3710 to be its local rows and the corresponding columns (a square submatrix); 3711 each processor's off-diagonal portion encompasses the remainder of the 3712 local matrix (a rectangular submatrix). 3713 3714 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3715 3716 When calling this routine with a single process communicator, a matrix of 3717 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3718 type of communicator, use the construction mechanism: 3719 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3720 3721 By default, this format uses inodes (identical nodes) when possible. 3722 We search for consecutive rows with the same nonzero structure, thereby 3723 reusing matrix information to achieve increased efficiency. 3724 3725 Options Database Keys: 3726 + -mat_no_inode - Do not use inodes 3727 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3728 - -mat_aij_oneindex - Internally use indexing starting at 1 3729 rather than 0. Note that when calling MatSetValues(), 3730 the user still MUST index entries starting at 0! 3731 3732 3733 Example usage: 3734 3735 Consider the following 8x8 matrix with 34 non-zero values, that is 3736 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3737 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3738 as follows: 3739 3740 .vb 3741 1 2 0 | 0 3 0 | 0 4 3742 Proc0 0 5 6 | 7 0 0 | 8 0 3743 9 0 10 | 11 0 0 | 12 0 3744 ------------------------------------- 3745 13 0 14 | 15 16 17 | 0 0 3746 Proc1 0 18 0 | 19 20 21 | 0 0 3747 0 0 0 | 22 23 0 | 24 0 3748 ------------------------------------- 3749 Proc2 25 26 27 | 0 0 28 | 29 0 3750 30 0 0 | 31 32 33 | 0 34 3751 .ve 3752 3753 This can be represented as a collection of submatrices as: 3754 3755 .vb 3756 A B C 3757 D E F 3758 G H I 3759 .ve 3760 3761 Where the submatrices A,B,C are owned by proc0, D,E,F are 3762 owned by proc1, G,H,I are owned by proc2. 3763 3764 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3765 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3766 The 'M','N' parameters are 8,8, and have the same values on all procs. 3767 3768 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3769 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3770 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3771 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3772 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3773 matrix, ans [DF] as another SeqAIJ matrix. 3774 3775 When d_nz, o_nz parameters are specified, d_nz storage elements are 3776 allocated for every row of the local diagonal submatrix, and o_nz 3777 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3778 One way to choose d_nz and o_nz is to use the max nonzerors per local 3779 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3780 In this case, the values of d_nz,o_nz are: 3781 .vb 3782 proc0 : dnz = 2, o_nz = 2 3783 proc1 : dnz = 3, o_nz = 2 3784 proc2 : dnz = 1, o_nz = 4 3785 .ve 3786 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3787 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3788 for proc3. i.e we are using 12+15+10=37 storage locations to store 3789 34 values. 3790 3791 When d_nnz, o_nnz parameters are specified, the storage is specified 3792 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3793 In the above case the values for d_nnz,o_nnz are: 3794 .vb 3795 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3796 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3797 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3798 .ve 3799 Here the space allocated is sum of all the above values i.e 34, and 3800 hence pre-allocation is perfect. 3801 3802 Level: intermediate 3803 3804 .keywords: matrix, aij, compressed row, sparse, parallel 3805 3806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3807 MPIAIJ, MatCreateMPIAIJWithArrays() 3808 @*/ 3809 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3810 { 3811 PetscErrorCode ierr; 3812 PetscMPIInt size; 3813 3814 PetscFunctionBegin; 3815 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3816 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3817 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3818 if (size > 1) { 3819 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3820 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3821 } else { 3822 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3823 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3824 } 3825 PetscFunctionReturn(0); 3826 } 3827 3828 #undef __FUNCT__ 3829 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3830 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3831 { 3832 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3833 3834 PetscFunctionBegin; 3835 *Ad = a->A; 3836 *Ao = a->B; 3837 *colmap = a->garray; 3838 PetscFunctionReturn(0); 3839 } 3840 3841 #undef __FUNCT__ 3842 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3843 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3844 { 3845 PetscErrorCode ierr; 3846 PetscInt i; 3847 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3848 3849 PetscFunctionBegin; 3850 if (coloring->ctype == IS_COLORING_GLOBAL) { 3851 ISColoringValue *allcolors,*colors; 3852 ISColoring ocoloring; 3853 3854 /* set coloring for diagonal portion */ 3855 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3856 3857 /* set coloring for off-diagonal portion */ 3858 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3859 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3860 for (i=0; i<a->B->cmap->n; i++) { 3861 colors[i] = allcolors[a->garray[i]]; 3862 } 3863 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3864 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3865 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3866 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3867 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3868 ISColoringValue *colors; 3869 PetscInt *larray; 3870 ISColoring ocoloring; 3871 3872 /* set coloring for diagonal portion */ 3873 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3874 for (i=0; i<a->A->cmap->n; i++) { 3875 larray[i] = i + A->cmap->rstart; 3876 } 3877 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3878 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3879 for (i=0; i<a->A->cmap->n; i++) { 3880 colors[i] = coloring->colors[larray[i]]; 3881 } 3882 ierr = PetscFree(larray);CHKERRQ(ierr); 3883 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3884 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3885 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3886 3887 /* set coloring for off-diagonal portion */ 3888 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3889 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3890 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3891 for (i=0; i<a->B->cmap->n; i++) { 3892 colors[i] = coloring->colors[larray[i]]; 3893 } 3894 ierr = PetscFree(larray);CHKERRQ(ierr); 3895 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3896 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3897 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3898 } else { 3899 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3900 } 3901 3902 PetscFunctionReturn(0); 3903 } 3904 3905 #if defined(PETSC_HAVE_ADIC) 3906 #undef __FUNCT__ 3907 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3908 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3909 { 3910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3911 PetscErrorCode ierr; 3912 3913 PetscFunctionBegin; 3914 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3915 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 #endif 3919 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3922 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3923 { 3924 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3925 PetscErrorCode ierr; 3926 3927 PetscFunctionBegin; 3928 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3929 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3930 PetscFunctionReturn(0); 3931 } 3932 3933 #undef __FUNCT__ 3934 #define __FUNCT__ "MatMerge" 3935 /*@ 3936 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3937 matrices from each processor 3938 3939 Collective on MPI_Comm 3940 3941 Input Parameters: 3942 + comm - the communicators the parallel matrix will live on 3943 . inmat - the input sequential matrices 3944 . n - number of local columns (or PETSC_DECIDE) 3945 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3946 3947 Output Parameter: 3948 . outmat - the parallel matrix generated 3949 3950 Level: advanced 3951 3952 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3953 3954 @*/ 3955 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3956 { 3957 PetscErrorCode ierr; 3958 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3959 PetscInt *indx; 3960 PetscScalar *values; 3961 3962 PetscFunctionBegin; 3963 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3964 if (scall == MAT_INITIAL_MATRIX){ 3965 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3966 if (n == PETSC_DECIDE){ 3967 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3968 } 3969 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3970 rstart -= m; 3971 3972 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3973 for (i=0;i<m;i++) { 3974 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3975 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3976 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3977 } 3978 /* This routine will ONLY return MPIAIJ type matrix */ 3979 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3980 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3981 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3982 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3983 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3984 3985 } else if (scall == MAT_REUSE_MATRIX){ 3986 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3987 } else { 3988 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3989 } 3990 3991 for (i=0;i<m;i++) { 3992 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3993 Ii = i + rstart; 3994 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3995 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3996 } 3997 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3998 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3999 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4000 4001 PetscFunctionReturn(0); 4002 } 4003 4004 #undef __FUNCT__ 4005 #define __FUNCT__ "MatFileSplit" 4006 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4007 { 4008 PetscErrorCode ierr; 4009 PetscMPIInt rank; 4010 PetscInt m,N,i,rstart,nnz; 4011 size_t len; 4012 const PetscInt *indx; 4013 PetscViewer out; 4014 char *name; 4015 Mat B; 4016 const PetscScalar *values; 4017 4018 PetscFunctionBegin; 4019 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4020 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4021 /* Should this be the type of the diagonal block of A? */ 4022 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4023 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4024 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4025 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4026 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4027 for (i=0;i<m;i++) { 4028 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4029 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4030 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4031 } 4032 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4033 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4034 4035 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4036 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4037 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4038 sprintf(name,"%s.%d",outfile,rank); 4039 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4040 ierr = PetscFree(name); 4041 ierr = MatView(B,out);CHKERRQ(ierr); 4042 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4043 ierr = MatDestroy(B);CHKERRQ(ierr); 4044 PetscFunctionReturn(0); 4045 } 4046 4047 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4048 #undef __FUNCT__ 4049 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4050 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4051 { 4052 PetscErrorCode ierr; 4053 Mat_Merge_SeqsToMPI *merge; 4054 PetscContainer container; 4055 4056 PetscFunctionBegin; 4057 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4058 if (container) { 4059 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4060 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4061 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4062 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4063 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4064 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4065 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4066 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4067 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4068 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4069 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4070 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4071 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4072 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4073 4074 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4075 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4076 } 4077 ierr = PetscFree(merge);CHKERRQ(ierr); 4078 4079 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4080 PetscFunctionReturn(0); 4081 } 4082 4083 #include "../src/mat/utils/freespace.h" 4084 #include "petscbt.h" 4085 4086 #undef __FUNCT__ 4087 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4088 /*@C 4089 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4090 matrices from each processor 4091 4092 Collective on MPI_Comm 4093 4094 Input Parameters: 4095 + comm - the communicators the parallel matrix will live on 4096 . seqmat - the input sequential matrices 4097 . m - number of local rows (or PETSC_DECIDE) 4098 . n - number of local columns (or PETSC_DECIDE) 4099 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4100 4101 Output Parameter: 4102 . mpimat - the parallel matrix generated 4103 4104 Level: advanced 4105 4106 Notes: 4107 The dimensions of the sequential matrix in each processor MUST be the same. 4108 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4109 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4110 @*/ 4111 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4112 { 4113 PetscErrorCode ierr; 4114 MPI_Comm comm=((PetscObject)mpimat)->comm; 4115 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4116 PetscMPIInt size,rank,taga,*len_s; 4117 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4118 PetscInt proc,m; 4119 PetscInt **buf_ri,**buf_rj; 4120 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4121 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4122 MPI_Request *s_waits,*r_waits; 4123 MPI_Status *status; 4124 MatScalar *aa=a->a; 4125 MatScalar **abuf_r,*ba_i; 4126 Mat_Merge_SeqsToMPI *merge; 4127 PetscContainer container; 4128 4129 PetscFunctionBegin; 4130 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4131 4132 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4133 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4134 4135 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4136 if (container) { 4137 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4138 } 4139 bi = merge->bi; 4140 bj = merge->bj; 4141 buf_ri = merge->buf_ri; 4142 buf_rj = merge->buf_rj; 4143 4144 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4145 owners = merge->rowmap->range; 4146 len_s = merge->len_s; 4147 4148 /* send and recv matrix values */ 4149 /*-----------------------------*/ 4150 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4151 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4152 4153 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4154 for (proc=0,k=0; proc<size; proc++){ 4155 if (!len_s[proc]) continue; 4156 i = owners[proc]; 4157 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4158 k++; 4159 } 4160 4161 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4162 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4163 ierr = PetscFree(status);CHKERRQ(ierr); 4164 4165 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4166 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4167 4168 /* insert mat values of mpimat */ 4169 /*----------------------------*/ 4170 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4171 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4172 4173 for (k=0; k<merge->nrecv; k++){ 4174 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4175 nrows = *(buf_ri_k[k]); 4176 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4177 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4178 } 4179 4180 /* set values of ba */ 4181 m = merge->rowmap->n; 4182 for (i=0; i<m; i++) { 4183 arow = owners[rank] + i; 4184 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4185 bnzi = bi[i+1] - bi[i]; 4186 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4187 4188 /* add local non-zero vals of this proc's seqmat into ba */ 4189 anzi = ai[arow+1] - ai[arow]; 4190 aj = a->j + ai[arow]; 4191 aa = a->a + ai[arow]; 4192 nextaj = 0; 4193 for (j=0; nextaj<anzi; j++){ 4194 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4195 ba_i[j] += aa[nextaj++]; 4196 } 4197 } 4198 4199 /* add received vals into ba */ 4200 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4201 /* i-th row */ 4202 if (i == *nextrow[k]) { 4203 anzi = *(nextai[k]+1) - *nextai[k]; 4204 aj = buf_rj[k] + *(nextai[k]); 4205 aa = abuf_r[k] + *(nextai[k]); 4206 nextaj = 0; 4207 for (j=0; nextaj<anzi; j++){ 4208 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4209 ba_i[j] += aa[nextaj++]; 4210 } 4211 } 4212 nextrow[k]++; nextai[k]++; 4213 } 4214 } 4215 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4216 } 4217 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4218 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4219 4220 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4221 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4222 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4223 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4224 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4225 PetscFunctionReturn(0); 4226 } 4227 4228 #undef __FUNCT__ 4229 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4230 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4231 { 4232 PetscErrorCode ierr; 4233 Mat B_mpi; 4234 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4235 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4236 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4237 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4238 PetscInt len,proc,*dnz,*onz; 4239 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4240 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4241 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4242 MPI_Status *status; 4243 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4244 PetscBT lnkbt; 4245 Mat_Merge_SeqsToMPI *merge; 4246 PetscContainer container; 4247 4248 PetscFunctionBegin; 4249 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4250 4251 /* make sure it is a PETSc comm */ 4252 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4254 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4255 4256 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4257 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4258 4259 /* determine row ownership */ 4260 /*---------------------------------------------------------*/ 4261 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4262 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4263 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4264 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4265 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4266 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4267 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4268 4269 m = merge->rowmap->n; 4270 M = merge->rowmap->N; 4271 owners = merge->rowmap->range; 4272 4273 /* determine the number of messages to send, their lengths */ 4274 /*---------------------------------------------------------*/ 4275 len_s = merge->len_s; 4276 4277 len = 0; /* length of buf_si[] */ 4278 merge->nsend = 0; 4279 for (proc=0; proc<size; proc++){ 4280 len_si[proc] = 0; 4281 if (proc == rank){ 4282 len_s[proc] = 0; 4283 } else { 4284 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4285 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4286 } 4287 if (len_s[proc]) { 4288 merge->nsend++; 4289 nrows = 0; 4290 for (i=owners[proc]; i<owners[proc+1]; i++){ 4291 if (ai[i+1] > ai[i]) nrows++; 4292 } 4293 len_si[proc] = 2*(nrows+1); 4294 len += len_si[proc]; 4295 } 4296 } 4297 4298 /* determine the number and length of messages to receive for ij-structure */ 4299 /*-------------------------------------------------------------------------*/ 4300 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4301 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4302 4303 /* post the Irecv of j-structure */ 4304 /*-------------------------------*/ 4305 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4306 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4307 4308 /* post the Isend of j-structure */ 4309 /*--------------------------------*/ 4310 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4311 4312 for (proc=0, k=0; proc<size; proc++){ 4313 if (!len_s[proc]) continue; 4314 i = owners[proc]; 4315 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4316 k++; 4317 } 4318 4319 /* receives and sends of j-structure are complete */ 4320 /*------------------------------------------------*/ 4321 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4322 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4323 4324 /* send and recv i-structure */ 4325 /*---------------------------*/ 4326 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4327 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4328 4329 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4330 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4331 for (proc=0,k=0; proc<size; proc++){ 4332 if (!len_s[proc]) continue; 4333 /* form outgoing message for i-structure: 4334 buf_si[0]: nrows to be sent 4335 [1:nrows]: row index (global) 4336 [nrows+1:2*nrows+1]: i-structure index 4337 */ 4338 /*-------------------------------------------*/ 4339 nrows = len_si[proc]/2 - 1; 4340 buf_si_i = buf_si + nrows+1; 4341 buf_si[0] = nrows; 4342 buf_si_i[0] = 0; 4343 nrows = 0; 4344 for (i=owners[proc]; i<owners[proc+1]; i++){ 4345 anzi = ai[i+1] - ai[i]; 4346 if (anzi) { 4347 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4348 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4349 nrows++; 4350 } 4351 } 4352 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4353 k++; 4354 buf_si += len_si[proc]; 4355 } 4356 4357 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4358 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4359 4360 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4361 for (i=0; i<merge->nrecv; i++){ 4362 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4363 } 4364 4365 ierr = PetscFree(len_si);CHKERRQ(ierr); 4366 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4367 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4368 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4369 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4370 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4371 ierr = PetscFree(status);CHKERRQ(ierr); 4372 4373 /* compute a local seq matrix in each processor */ 4374 /*----------------------------------------------*/ 4375 /* allocate bi array and free space for accumulating nonzero column info */ 4376 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4377 bi[0] = 0; 4378 4379 /* create and initialize a linked list */ 4380 nlnk = N+1; 4381 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4382 4383 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4384 len = 0; 4385 len = ai[owners[rank+1]] - ai[owners[rank]]; 4386 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4387 current_space = free_space; 4388 4389 /* determine symbolic info for each local row */ 4390 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4391 4392 for (k=0; k<merge->nrecv; k++){ 4393 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4394 nrows = *buf_ri_k[k]; 4395 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4396 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4397 } 4398 4399 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4400 len = 0; 4401 for (i=0;i<m;i++) { 4402 bnzi = 0; 4403 /* add local non-zero cols of this proc's seqmat into lnk */ 4404 arow = owners[rank] + i; 4405 anzi = ai[arow+1] - ai[arow]; 4406 aj = a->j + ai[arow]; 4407 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4408 bnzi += nlnk; 4409 /* add received col data into lnk */ 4410 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4411 if (i == *nextrow[k]) { /* i-th row */ 4412 anzi = *(nextai[k]+1) - *nextai[k]; 4413 aj = buf_rj[k] + *nextai[k]; 4414 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4415 bnzi += nlnk; 4416 nextrow[k]++; nextai[k]++; 4417 } 4418 } 4419 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4420 4421 /* if free space is not available, make more free space */ 4422 if (current_space->local_remaining<bnzi) { 4423 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4424 nspacedouble++; 4425 } 4426 /* copy data into free space, then initialize lnk */ 4427 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4428 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4429 4430 current_space->array += bnzi; 4431 current_space->local_used += bnzi; 4432 current_space->local_remaining -= bnzi; 4433 4434 bi[i+1] = bi[i] + bnzi; 4435 } 4436 4437 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4438 4439 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4440 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4441 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4442 4443 /* create symbolic parallel matrix B_mpi */ 4444 /*---------------------------------------*/ 4445 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4446 if (n==PETSC_DECIDE) { 4447 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4448 } else { 4449 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4450 } 4451 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4452 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4453 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4454 4455 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4456 B_mpi->assembled = PETSC_FALSE; 4457 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4458 merge->bi = bi; 4459 merge->bj = bj; 4460 merge->buf_ri = buf_ri; 4461 merge->buf_rj = buf_rj; 4462 merge->coi = PETSC_NULL; 4463 merge->coj = PETSC_NULL; 4464 merge->owners_co = PETSC_NULL; 4465 4466 /* attach the supporting struct to B_mpi for reuse */ 4467 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4468 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4469 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4470 *mpimat = B_mpi; 4471 4472 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4473 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "MatMerge_SeqsToMPI" 4479 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4480 { 4481 PetscErrorCode ierr; 4482 4483 PetscFunctionBegin; 4484 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4485 if (scall == MAT_INITIAL_MATRIX){ 4486 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4487 } 4488 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4489 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4490 PetscFunctionReturn(0); 4491 } 4492 4493 #undef __FUNCT__ 4494 #define __FUNCT__ "MatGetLocalMat" 4495 /*@ 4496 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4497 4498 Not Collective 4499 4500 Input Parameters: 4501 + A - the matrix 4502 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4503 4504 Output Parameter: 4505 . A_loc - the local sequential matrix generated 4506 4507 Level: developer 4508 4509 @*/ 4510 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4511 { 4512 PetscErrorCode ierr; 4513 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4514 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4515 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4516 MatScalar *aa=a->a,*ba=b->a,*cam; 4517 PetscScalar *ca; 4518 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4519 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4520 4521 PetscFunctionBegin; 4522 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4523 if (scall == MAT_INITIAL_MATRIX){ 4524 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4525 ci[0] = 0; 4526 for (i=0; i<am; i++){ 4527 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4528 } 4529 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4530 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4531 k = 0; 4532 for (i=0; i<am; i++) { 4533 ncols_o = bi[i+1] - bi[i]; 4534 ncols_d = ai[i+1] - ai[i]; 4535 /* off-diagonal portion of A */ 4536 for (jo=0; jo<ncols_o; jo++) { 4537 col = cmap[*bj]; 4538 if (col >= cstart) break; 4539 cj[k] = col; bj++; 4540 ca[k++] = *ba++; 4541 } 4542 /* diagonal portion of A */ 4543 for (j=0; j<ncols_d; j++) { 4544 cj[k] = cstart + *aj++; 4545 ca[k++] = *aa++; 4546 } 4547 /* off-diagonal portion of A */ 4548 for (j=jo; j<ncols_o; j++) { 4549 cj[k] = cmap[*bj++]; 4550 ca[k++] = *ba++; 4551 } 4552 } 4553 /* put together the new matrix */ 4554 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4555 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4556 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4557 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4558 mat->free_a = PETSC_TRUE; 4559 mat->free_ij = PETSC_TRUE; 4560 mat->nonew = 0; 4561 } else if (scall == MAT_REUSE_MATRIX){ 4562 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4563 ci = mat->i; cj = mat->j; cam = mat->a; 4564 for (i=0; i<am; i++) { 4565 /* off-diagonal portion of A */ 4566 ncols_o = bi[i+1] - bi[i]; 4567 for (jo=0; jo<ncols_o; jo++) { 4568 col = cmap[*bj]; 4569 if (col >= cstart) break; 4570 *cam++ = *ba++; bj++; 4571 } 4572 /* diagonal portion of A */ 4573 ncols_d = ai[i+1] - ai[i]; 4574 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4575 /* off-diagonal portion of A */ 4576 for (j=jo; j<ncols_o; j++) { 4577 *cam++ = *ba++; bj++; 4578 } 4579 } 4580 } else { 4581 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4582 } 4583 4584 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "MatGetLocalMatCondensed" 4590 /*@C 4591 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4592 4593 Not Collective 4594 4595 Input Parameters: 4596 + A - the matrix 4597 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4598 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4599 4600 Output Parameter: 4601 . A_loc - the local sequential matrix generated 4602 4603 Level: developer 4604 4605 @*/ 4606 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4607 { 4608 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4609 PetscErrorCode ierr; 4610 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4611 IS isrowa,iscola; 4612 Mat *aloc; 4613 4614 PetscFunctionBegin; 4615 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4616 if (!row){ 4617 start = A->rmap->rstart; end = A->rmap->rend; 4618 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4619 } else { 4620 isrowa = *row; 4621 } 4622 if (!col){ 4623 start = A->cmap->rstart; 4624 cmap = a->garray; 4625 nzA = a->A->cmap->n; 4626 nzB = a->B->cmap->n; 4627 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4628 ncols = 0; 4629 for (i=0; i<nzB; i++) { 4630 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4631 else break; 4632 } 4633 imark = i; 4634 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4635 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4636 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4637 ierr = PetscFree(idx);CHKERRQ(ierr); 4638 } else { 4639 iscola = *col; 4640 } 4641 if (scall != MAT_INITIAL_MATRIX){ 4642 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4643 aloc[0] = *A_loc; 4644 } 4645 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4646 *A_loc = aloc[0]; 4647 ierr = PetscFree(aloc);CHKERRQ(ierr); 4648 if (!row){ 4649 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4650 } 4651 if (!col){ 4652 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4653 } 4654 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4655 PetscFunctionReturn(0); 4656 } 4657 4658 #undef __FUNCT__ 4659 #define __FUNCT__ "MatGetBrowsOfAcols" 4660 /*@C 4661 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4662 4663 Collective on Mat 4664 4665 Input Parameters: 4666 + A,B - the matrices in mpiaij format 4667 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4668 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4669 4670 Output Parameter: 4671 + rowb, colb - index sets of rows and columns of B to extract 4672 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4673 - B_seq - the sequential matrix generated 4674 4675 Level: developer 4676 4677 @*/ 4678 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4679 { 4680 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4681 PetscErrorCode ierr; 4682 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4683 IS isrowb,iscolb; 4684 Mat *bseq; 4685 4686 PetscFunctionBegin; 4687 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4688 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4689 } 4690 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4691 4692 if (scall == MAT_INITIAL_MATRIX){ 4693 start = A->cmap->rstart; 4694 cmap = a->garray; 4695 nzA = a->A->cmap->n; 4696 nzB = a->B->cmap->n; 4697 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4698 ncols = 0; 4699 for (i=0; i<nzB; i++) { /* row < local row index */ 4700 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4701 else break; 4702 } 4703 imark = i; 4704 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4705 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4706 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4707 ierr = PetscFree(idx);CHKERRQ(ierr); 4708 *brstart = imark; 4709 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4710 } else { 4711 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4712 isrowb = *rowb; iscolb = *colb; 4713 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4714 bseq[0] = *B_seq; 4715 } 4716 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4717 *B_seq = bseq[0]; 4718 ierr = PetscFree(bseq);CHKERRQ(ierr); 4719 if (!rowb){ 4720 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4721 } else { 4722 *rowb = isrowb; 4723 } 4724 if (!colb){ 4725 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4726 } else { 4727 *colb = iscolb; 4728 } 4729 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4730 PetscFunctionReturn(0); 4731 } 4732 4733 #undef __FUNCT__ 4734 #define __FUNCT__ "MatGetBrowsOfAoCols" 4735 /*@C 4736 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4737 of the OFF-DIAGONAL portion of local A 4738 4739 Collective on Mat 4740 4741 Input Parameters: 4742 + A,B - the matrices in mpiaij format 4743 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4744 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4745 . startsj_r - similar to startsj for receives 4746 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4747 4748 Output Parameter: 4749 + B_oth - the sequential matrix generated 4750 4751 Level: developer 4752 4753 @*/ 4754 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4755 { 4756 VecScatter_MPI_General *gen_to,*gen_from; 4757 PetscErrorCode ierr; 4758 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4759 Mat_SeqAIJ *b_oth; 4760 VecScatter ctx=a->Mvctx; 4761 MPI_Comm comm=((PetscObject)ctx)->comm; 4762 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4763 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4764 PetscScalar *rvalues,*svalues; 4765 MatScalar *b_otha,*bufa,*bufA; 4766 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4767 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4768 MPI_Status *sstatus,rstatus; 4769 PetscMPIInt jj; 4770 PetscInt *cols,sbs,rbs; 4771 PetscScalar *vals; 4772 4773 PetscFunctionBegin; 4774 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4775 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4776 } 4777 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4778 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4779 4780 gen_to = (VecScatter_MPI_General*)ctx->todata; 4781 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4782 rvalues = gen_from->values; /* holds the length of receiving row */ 4783 svalues = gen_to->values; /* holds the length of sending row */ 4784 nrecvs = gen_from->n; 4785 nsends = gen_to->n; 4786 4787 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4788 srow = gen_to->indices; /* local row index to be sent */ 4789 sstarts = gen_to->starts; 4790 sprocs = gen_to->procs; 4791 sstatus = gen_to->sstatus; 4792 sbs = gen_to->bs; 4793 rstarts = gen_from->starts; 4794 rprocs = gen_from->procs; 4795 rbs = gen_from->bs; 4796 4797 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4798 if (scall == MAT_INITIAL_MATRIX){ 4799 /* i-array */ 4800 /*---------*/ 4801 /* post receives */ 4802 for (i=0; i<nrecvs; i++){ 4803 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4804 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4805 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4806 } 4807 4808 /* pack the outgoing message */ 4809 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4810 sstartsj[0] = 0; rstartsj[0] = 0; 4811 len = 0; /* total length of j or a array to be sent */ 4812 k = 0; 4813 for (i=0; i<nsends; i++){ 4814 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4815 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4816 for (j=0; j<nrows; j++) { 4817 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4818 for (l=0; l<sbs; l++){ 4819 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4820 rowlen[j*sbs+l] = ncols; 4821 len += ncols; 4822 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4823 } 4824 k++; 4825 } 4826 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4827 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4828 } 4829 /* recvs and sends of i-array are completed */ 4830 i = nrecvs; 4831 while (i--) { 4832 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4833 } 4834 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4835 4836 /* allocate buffers for sending j and a arrays */ 4837 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4838 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4839 4840 /* create i-array of B_oth */ 4841 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4842 b_othi[0] = 0; 4843 len = 0; /* total length of j or a array to be received */ 4844 k = 0; 4845 for (i=0; i<nrecvs; i++){ 4846 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4847 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4848 for (j=0; j<nrows; j++) { 4849 b_othi[k+1] = b_othi[k] + rowlen[j]; 4850 len += rowlen[j]; k++; 4851 } 4852 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4853 } 4854 4855 /* allocate space for j and a arrrays of B_oth */ 4856 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4857 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4858 4859 /* j-array */ 4860 /*---------*/ 4861 /* post receives of j-array */ 4862 for (i=0; i<nrecvs; i++){ 4863 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4864 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4865 } 4866 4867 /* pack the outgoing message j-array */ 4868 k = 0; 4869 for (i=0; i<nsends; i++){ 4870 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4871 bufJ = bufj+sstartsj[i]; 4872 for (j=0; j<nrows; j++) { 4873 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4874 for (ll=0; ll<sbs; ll++){ 4875 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4876 for (l=0; l<ncols; l++){ 4877 *bufJ++ = cols[l]; 4878 } 4879 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4880 } 4881 } 4882 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4883 } 4884 4885 /* recvs and sends of j-array are completed */ 4886 i = nrecvs; 4887 while (i--) { 4888 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4889 } 4890 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4891 } else if (scall == MAT_REUSE_MATRIX){ 4892 sstartsj = *startsj; 4893 rstartsj = *startsj_r; 4894 bufa = *bufa_ptr; 4895 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4896 b_otha = b_oth->a; 4897 } else { 4898 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4899 } 4900 4901 /* a-array */ 4902 /*---------*/ 4903 /* post receives of a-array */ 4904 for (i=0; i<nrecvs; i++){ 4905 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4906 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4907 } 4908 4909 /* pack the outgoing message a-array */ 4910 k = 0; 4911 for (i=0; i<nsends; i++){ 4912 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4913 bufA = bufa+sstartsj[i]; 4914 for (j=0; j<nrows; j++) { 4915 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4916 for (ll=0; ll<sbs; ll++){ 4917 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4918 for (l=0; l<ncols; l++){ 4919 *bufA++ = vals[l]; 4920 } 4921 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4922 } 4923 } 4924 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4925 } 4926 /* recvs and sends of a-array are completed */ 4927 i = nrecvs; 4928 while (i--) { 4929 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4930 } 4931 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4932 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4933 4934 if (scall == MAT_INITIAL_MATRIX){ 4935 /* put together the new matrix */ 4936 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4937 4938 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4939 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4940 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4941 b_oth->free_a = PETSC_TRUE; 4942 b_oth->free_ij = PETSC_TRUE; 4943 b_oth->nonew = 0; 4944 4945 ierr = PetscFree(bufj);CHKERRQ(ierr); 4946 if (!startsj || !bufa_ptr){ 4947 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4948 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4949 } else { 4950 *startsj = sstartsj; 4951 *startsj_r = rstartsj; 4952 *bufa_ptr = bufa; 4953 } 4954 } 4955 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 #undef __FUNCT__ 4960 #define __FUNCT__ "MatGetCommunicationStructs" 4961 /*@C 4962 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4963 4964 Not Collective 4965 4966 Input Parameters: 4967 . A - The matrix in mpiaij format 4968 4969 Output Parameter: 4970 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4971 . colmap - A map from global column index to local index into lvec 4972 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4973 4974 Level: developer 4975 4976 @*/ 4977 #if defined (PETSC_USE_CTABLE) 4978 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4979 #else 4980 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4981 #endif 4982 { 4983 Mat_MPIAIJ *a; 4984 4985 PetscFunctionBegin; 4986 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4987 PetscValidPointer(lvec, 2); 4988 PetscValidPointer(colmap, 3); 4989 PetscValidPointer(multScatter, 4); 4990 a = (Mat_MPIAIJ *) A->data; 4991 if (lvec) *lvec = a->lvec; 4992 if (colmap) *colmap = a->colmap; 4993 if (multScatter) *multScatter = a->Mvctx; 4994 PetscFunctionReturn(0); 4995 } 4996 4997 EXTERN_C_BEGIN 4998 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4999 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5000 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5001 EXTERN_C_END 5002 5003 #undef __FUNCT__ 5004 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5005 /* 5006 Computes (B'*A')' since computing B*A directly is untenable 5007 5008 n p p 5009 ( ) ( ) ( ) 5010 m ( A ) * n ( B ) = m ( C ) 5011 ( ) ( ) ( ) 5012 5013 */ 5014 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5015 { 5016 PetscErrorCode ierr; 5017 Mat At,Bt,Ct; 5018 5019 PetscFunctionBegin; 5020 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5021 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5022 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5023 ierr = MatDestroy(At);CHKERRQ(ierr); 5024 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5025 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5026 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5027 PetscFunctionReturn(0); 5028 } 5029 5030 #undef __FUNCT__ 5031 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5032 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5033 { 5034 PetscErrorCode ierr; 5035 PetscInt m=A->rmap->n,n=B->cmap->n; 5036 Mat Cmat; 5037 5038 PetscFunctionBegin; 5039 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5040 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5041 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5042 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5043 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5044 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5045 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5046 *C = Cmat; 5047 PetscFunctionReturn(0); 5048 } 5049 5050 /* ----------------------------------------------------------------*/ 5051 #undef __FUNCT__ 5052 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5053 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5054 { 5055 PetscErrorCode ierr; 5056 5057 PetscFunctionBegin; 5058 if (scall == MAT_INITIAL_MATRIX){ 5059 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5060 } 5061 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 EXTERN_C_BEGIN 5066 #if defined(PETSC_HAVE_MUMPS) 5067 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5068 #endif 5069 #if defined(PETSC_HAVE_PASTIX) 5070 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5071 #endif 5072 #if defined(PETSC_HAVE_SUPERLU_DIST) 5073 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5074 #endif 5075 #if defined(PETSC_HAVE_SPOOLES) 5076 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5077 #endif 5078 EXTERN_C_END 5079 5080 /*MC 5081 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5082 5083 Options Database Keys: 5084 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5085 5086 Level: beginner 5087 5088 .seealso: MatCreateMPIAIJ() 5089 M*/ 5090 5091 EXTERN_C_BEGIN 5092 #undef __FUNCT__ 5093 #define __FUNCT__ "MatCreate_MPIAIJ" 5094 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5095 { 5096 Mat_MPIAIJ *b; 5097 PetscErrorCode ierr; 5098 PetscMPIInt size; 5099 5100 PetscFunctionBegin; 5101 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5102 5103 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5104 B->data = (void*)b; 5105 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5106 B->rmap->bs = 1; 5107 B->assembled = PETSC_FALSE; 5108 B->mapping = 0; 5109 5110 B->insertmode = NOT_SET_VALUES; 5111 b->size = size; 5112 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5113 5114 /* build cache for off array entries formed */ 5115 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5116 b->donotstash = PETSC_FALSE; 5117 b->colmap = 0; 5118 b->garray = 0; 5119 b->roworiented = PETSC_TRUE; 5120 5121 /* stuff used for matrix vector multiply */ 5122 b->lvec = PETSC_NULL; 5123 b->Mvctx = PETSC_NULL; 5124 5125 /* stuff for MatGetRow() */ 5126 b->rowindices = 0; 5127 b->rowvalues = 0; 5128 b->getrowactive = PETSC_FALSE; 5129 5130 #if defined(PETSC_HAVE_SPOOLES) 5131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5132 "MatGetFactor_mpiaij_spooles", 5133 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5134 #endif 5135 #if defined(PETSC_HAVE_MUMPS) 5136 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5137 "MatGetFactor_aij_mumps", 5138 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5139 #endif 5140 #if defined(PETSC_HAVE_PASTIX) 5141 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5142 "MatGetFactor_mpiaij_pastix", 5143 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5144 #endif 5145 #if defined(PETSC_HAVE_SUPERLU_DIST) 5146 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5147 "MatGetFactor_mpiaij_superlu_dist", 5148 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5149 #endif 5150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5151 "MatStoreValues_MPIAIJ", 5152 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5153 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5154 "MatRetrieveValues_MPIAIJ", 5155 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5156 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5157 "MatGetDiagonalBlock_MPIAIJ", 5158 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5159 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5160 "MatIsTranspose_MPIAIJ", 5161 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5162 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5163 "MatMPIAIJSetPreallocation_MPIAIJ", 5164 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5165 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5166 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5167 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5168 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5169 "MatDiagonalScaleLocal_MPIAIJ", 5170 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5171 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5172 "MatConvert_MPIAIJ_MPIAIJPERM", 5173 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5174 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5175 "MatConvert_MPIAIJ_MPIAIJCRL", 5176 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5177 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5178 "MatConvert_MPIAIJ_MPISBAIJ", 5179 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5180 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5181 "MatMatMult_MPIDense_MPIAIJ", 5182 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5183 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5184 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5185 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5186 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5187 "MatMatMultNumeric_MPIDense_MPIAIJ", 5188 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5189 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5190 PetscFunctionReturn(0); 5191 } 5192 EXTERN_C_END 5193 5194 #undef __FUNCT__ 5195 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5196 /*@ 5197 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5198 and "off-diagonal" part of the matrix in CSR format. 5199 5200 Collective on MPI_Comm 5201 5202 Input Parameters: 5203 + comm - MPI communicator 5204 . m - number of local rows (Cannot be PETSC_DECIDE) 5205 . n - This value should be the same as the local size used in creating the 5206 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5207 calculated if N is given) For square matrices n is almost always m. 5208 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5209 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5210 . i - row indices for "diagonal" portion of matrix 5211 . j - column indices 5212 . a - matrix values 5213 . oi - row indices for "off-diagonal" portion of matrix 5214 . oj - column indices 5215 - oa - matrix values 5216 5217 Output Parameter: 5218 . mat - the matrix 5219 5220 Level: advanced 5221 5222 Notes: 5223 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5224 5225 The i and j indices are 0 based 5226 5227 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5228 5229 This sets local rows and cannot be used to set off-processor values. 5230 5231 You cannot later use MatSetValues() to change values in this matrix. 5232 5233 .keywords: matrix, aij, compressed row, sparse, parallel 5234 5235 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5236 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5237 @*/ 5238 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5239 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5240 { 5241 PetscErrorCode ierr; 5242 Mat_MPIAIJ *maij; 5243 5244 PetscFunctionBegin; 5245 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5246 if (i[0]) { 5247 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5248 } 5249 if (oi[0]) { 5250 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5251 } 5252 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5253 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5254 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5255 maij = (Mat_MPIAIJ*) (*mat)->data; 5256 maij->donotstash = PETSC_TRUE; 5257 (*mat)->preallocated = PETSC_TRUE; 5258 5259 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5260 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5261 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5262 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5263 5264 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5265 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5266 5267 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5268 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5269 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5270 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5271 5272 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5273 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5274 PetscFunctionReturn(0); 5275 } 5276 5277 /* 5278 Special version for direct calls from Fortran 5279 */ 5280 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5281 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5282 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5283 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5284 #endif 5285 5286 /* Change these macros so can be used in void function */ 5287 #undef CHKERRQ 5288 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5289 #undef SETERRQ2 5290 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5291 #undef SETERRQ 5292 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5293 5294 EXTERN_C_BEGIN 5295 #undef __FUNCT__ 5296 #define __FUNCT__ "matsetvaluesmpiaij_" 5297 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5298 { 5299 Mat mat = *mmat; 5300 PetscInt m = *mm, n = *mn; 5301 InsertMode addv = *maddv; 5302 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5303 PetscScalar value; 5304 PetscErrorCode ierr; 5305 5306 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5307 if (mat->insertmode == NOT_SET_VALUES) { 5308 mat->insertmode = addv; 5309 } 5310 #if defined(PETSC_USE_DEBUG) 5311 else if (mat->insertmode != addv) { 5312 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5313 } 5314 #endif 5315 { 5316 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5317 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5318 PetscBool roworiented = aij->roworiented; 5319 5320 /* Some Variables required in the macro */ 5321 Mat A = aij->A; 5322 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5323 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5324 MatScalar *aa = a->a; 5325 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5326 Mat B = aij->B; 5327 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5328 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5329 MatScalar *ba = b->a; 5330 5331 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5332 PetscInt nonew = a->nonew; 5333 MatScalar *ap1,*ap2; 5334 5335 PetscFunctionBegin; 5336 for (i=0; i<m; i++) { 5337 if (im[i] < 0) continue; 5338 #if defined(PETSC_USE_DEBUG) 5339 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5340 #endif 5341 if (im[i] >= rstart && im[i] < rend) { 5342 row = im[i] - rstart; 5343 lastcol1 = -1; 5344 rp1 = aj + ai[row]; 5345 ap1 = aa + ai[row]; 5346 rmax1 = aimax[row]; 5347 nrow1 = ailen[row]; 5348 low1 = 0; 5349 high1 = nrow1; 5350 lastcol2 = -1; 5351 rp2 = bj + bi[row]; 5352 ap2 = ba + bi[row]; 5353 rmax2 = bimax[row]; 5354 nrow2 = bilen[row]; 5355 low2 = 0; 5356 high2 = nrow2; 5357 5358 for (j=0; j<n; j++) { 5359 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5360 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5361 if (in[j] >= cstart && in[j] < cend){ 5362 col = in[j] - cstart; 5363 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5364 } else if (in[j] < 0) continue; 5365 #if defined(PETSC_USE_DEBUG) 5366 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5367 #endif 5368 else { 5369 if (mat->was_assembled) { 5370 if (!aij->colmap) { 5371 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5372 } 5373 #if defined (PETSC_USE_CTABLE) 5374 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5375 col--; 5376 #else 5377 col = aij->colmap[in[j]] - 1; 5378 #endif 5379 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5380 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5381 col = in[j]; 5382 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5383 B = aij->B; 5384 b = (Mat_SeqAIJ*)B->data; 5385 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5386 rp2 = bj + bi[row]; 5387 ap2 = ba + bi[row]; 5388 rmax2 = bimax[row]; 5389 nrow2 = bilen[row]; 5390 low2 = 0; 5391 high2 = nrow2; 5392 bm = aij->B->rmap->n; 5393 ba = b->a; 5394 } 5395 } else col = in[j]; 5396 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5397 } 5398 } 5399 } else { 5400 if (!aij->donotstash) { 5401 if (roworiented) { 5402 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5403 } else { 5404 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5405 } 5406 } 5407 } 5408 }} 5409 PetscFunctionReturnVoid(); 5410 } 5411 EXTERN_C_END 5412 5413