1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscBool found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 if (A->nooffproczerorows) { 606 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 607 nrecvs = nsends; 608 nmax = N; 609 } else { 610 /* inform other processors of number of messages and max length*/ 611 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 612 } 613 614 /* post receives: */ 615 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 616 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 617 for (i=0; i<nrecvs; i++) { 618 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 619 } 620 621 /* do sends: 622 1) starts[i] gives the starting index in svalues for stuff going to 623 the ith processor 624 */ 625 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 626 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 627 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 628 starts[0] = 0; 629 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 for (i=0; i<N; i++) { 631 svalues[starts[owner[i]]++] = rows[i]; 632 } 633 634 starts[0] = 0; 635 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 636 count = 0; 637 for (i=0; i<size; i++) { 638 if (nprocs[2*i+1]) { 639 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 640 } 641 } 642 ierr = PetscFree(starts);CHKERRQ(ierr); 643 644 base = owners[rank]; 645 646 /* wait on receives */ 647 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 648 count = nrecvs; slen = 0; 649 while (count) { 650 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 651 /* unpack receives into our local space */ 652 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 653 source[imdex] = recv_status.MPI_SOURCE; 654 lens[imdex] = n; 655 slen += n; 656 count--; 657 } 658 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 659 660 /* move the data into the send scatter */ 661 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 662 count = 0; 663 for (i=0; i<nrecvs; i++) { 664 values = rvalues + i*nmax; 665 for (j=0; j<lens[i]; j++) { 666 lrows[count++] = values[j] - base; 667 } 668 } 669 ierr = PetscFree(rvalues);CHKERRQ(ierr); 670 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 671 ierr = PetscFree(owner);CHKERRQ(ierr); 672 ierr = PetscFree(nprocs);CHKERRQ(ierr); 673 674 /* actually zap the local rows */ 675 /* 676 Zero the required rows. If the "diagonal block" of the matrix 677 is square and the user wishes to set the diagonal we use separate 678 code so that MatSetValues() is not called for each diagonal allocating 679 new memory, thus calling lots of mallocs and slowing things down. 680 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 726 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 728 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 736 { 737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatMultAdd_MPIAIJ" 747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 748 { 749 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 755 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 763 { 764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 765 PetscErrorCode ierr; 766 PetscBool merged; 767 768 PetscFunctionBegin; 769 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 770 /* do nondiagonal part */ 771 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 772 if (!merged) { 773 /* send it on its way */ 774 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 775 /* do local part */ 776 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 777 /* receive remote parts: note this assumes the values are not actually */ 778 /* added in yy until the next line, */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } else { 781 /* do local part */ 782 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 783 /* send it on its way */ 784 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 /* values actually were received in the Begin() but we need to call this nop */ 786 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 EXTERN_C_BEGIN 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 795 { 796 MPI_Comm comm; 797 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 798 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 799 IS Me,Notme; 800 PetscErrorCode ierr; 801 PetscInt M,N,first,last,*notme,i; 802 PetscMPIInt size; 803 804 PetscFunctionBegin; 805 806 /* Easy test: symmetric diagonal block */ 807 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 808 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 809 if (!*f) PetscFunctionReturn(0); 810 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 812 if (size == 1) PetscFunctionReturn(0); 813 814 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 815 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 816 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 817 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 818 for (i=0; i<first; i++) notme[i] = i; 819 for (i=last; i<M; i++) notme[i-last+first] = i; 820 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 821 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 822 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 823 Aoff = Aoffs[0]; 824 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 825 Boff = Boffs[0]; 826 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 828 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 829 ierr = ISDestroy(Me);CHKERRQ(ierr); 830 ierr = ISDestroy(Notme);CHKERRQ(ierr); 831 832 PetscFunctionReturn(0); 833 } 834 EXTERN_C_END 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 839 { 840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 /* do nondiagonal part */ 845 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 846 /* send it on its way */ 847 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 848 /* do local part */ 849 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 850 /* receive remote parts */ 851 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 /* 856 This only works correctly for square matrices where the subblock A->A is the 857 diagonal block 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 862 { 863 PetscErrorCode ierr; 864 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 865 866 PetscFunctionBegin; 867 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 868 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 900 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_CLASSID; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscBool isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscBool inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscBool isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscBool flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscBool iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscBool hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SPD: 1449 A->spd_set = PETSC_TRUE; 1450 A->spd = flg; 1451 if (flg) { 1452 A->symmetric = PETSC_TRUE; 1453 A->structurally_symmetric = PETSC_TRUE; 1454 A->symmetric_set = PETSC_TRUE; 1455 A->structurally_symmetric_set = PETSC_TRUE; 1456 } 1457 break; 1458 case MAT_SYMMETRIC: 1459 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1460 break; 1461 case MAT_STRUCTURALLY_SYMMETRIC: 1462 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1463 break; 1464 case MAT_HERMITIAN: 1465 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1466 break; 1467 case MAT_SYMMETRY_ETERNAL: 1468 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1469 break; 1470 default: 1471 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatGetRow_MPIAIJ" 1478 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1479 { 1480 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1481 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1482 PetscErrorCode ierr; 1483 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1484 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1485 PetscInt *cmap,*idx_p; 1486 1487 PetscFunctionBegin; 1488 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1489 mat->getrowactive = PETSC_TRUE; 1490 1491 if (!mat->rowvalues && (idx || v)) { 1492 /* 1493 allocate enough space to hold information from the longest row. 1494 */ 1495 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1496 PetscInt max = 1,tmp; 1497 for (i=0; i<matin->rmap->n; i++) { 1498 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1499 if (max < tmp) { max = tmp; } 1500 } 1501 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1502 } 1503 1504 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1505 lrow = row - rstart; 1506 1507 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1508 if (!v) {pvA = 0; pvB = 0;} 1509 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1510 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1512 nztot = nzA + nzB; 1513 1514 cmap = mat->garray; 1515 if (v || idx) { 1516 if (nztot) { 1517 /* Sort by increasing column numbers, assuming A and B already sorted */ 1518 PetscInt imark = -1; 1519 if (v) { 1520 *v = v_p = mat->rowvalues; 1521 for (i=0; i<nzB; i++) { 1522 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1523 else break; 1524 } 1525 imark = i; 1526 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1527 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1528 } 1529 if (idx) { 1530 *idx = idx_p = mat->rowindices; 1531 if (imark > -1) { 1532 for (i=0; i<imark; i++) { 1533 idx_p[i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 for (i=0; i<nzB; i++) { 1537 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1538 else break; 1539 } 1540 imark = i; 1541 } 1542 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1543 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1544 } 1545 } else { 1546 if (idx) *idx = 0; 1547 if (v) *v = 0; 1548 } 1549 } 1550 *nz = nztot; 1551 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1552 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1558 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1559 { 1560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1561 1562 PetscFunctionBegin; 1563 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1564 aij->getrowactive = PETSC_FALSE; 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatNorm_MPIAIJ" 1570 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1571 { 1572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1573 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1574 PetscErrorCode ierr; 1575 PetscInt i,j,cstart = mat->cmap->rstart; 1576 PetscReal sum = 0.0; 1577 MatScalar *v; 1578 1579 PetscFunctionBegin; 1580 if (aij->size == 1) { 1581 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1582 } else { 1583 if (type == NORM_FROBENIUS) { 1584 v = amat->a; 1585 for (i=0; i<amat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 v = bmat->a; 1593 for (i=0; i<bmat->nz; i++) { 1594 #if defined(PETSC_USE_COMPLEX) 1595 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1596 #else 1597 sum += (*v)*(*v); v++; 1598 #endif 1599 } 1600 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1601 *norm = sqrt(*norm); 1602 } else if (type == NORM_1) { /* max column norm */ 1603 PetscReal *tmp,*tmp2; 1604 PetscInt *jj,*garray = aij->garray; 1605 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1606 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1607 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1608 *norm = 0.0; 1609 v = amat->a; jj = amat->j; 1610 for (j=0; j<amat->nz; j++) { 1611 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1612 } 1613 v = bmat->a; jj = bmat->j; 1614 for (j=0; j<bmat->nz; j++) { 1615 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1616 } 1617 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1618 for (j=0; j<mat->cmap->N; j++) { 1619 if (tmp2[j] > *norm) *norm = tmp2[j]; 1620 } 1621 ierr = PetscFree(tmp);CHKERRQ(ierr); 1622 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1623 } else if (type == NORM_INFINITY) { /* max row norm */ 1624 PetscReal ntemp = 0.0; 1625 for (j=0; j<aij->A->rmap->n; j++) { 1626 v = amat->a + amat->i[j]; 1627 sum = 0.0; 1628 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1629 sum += PetscAbsScalar(*v); v++; 1630 } 1631 v = bmat->a + bmat->i[j]; 1632 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1633 sum += PetscAbsScalar(*v); v++; 1634 } 1635 if (sum > ntemp) ntemp = sum; 1636 } 1637 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1638 } else { 1639 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1640 } 1641 } 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatTranspose_MPIAIJ" 1647 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1648 { 1649 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1650 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1651 PetscErrorCode ierr; 1652 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1653 PetscInt cstart=A->cmap->rstart,ncol; 1654 Mat B; 1655 MatScalar *array; 1656 1657 PetscFunctionBegin; 1658 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1659 1660 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1661 ai = Aloc->i; aj = Aloc->j; 1662 bi = Bloc->i; bj = Bloc->j; 1663 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1664 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1665 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1666 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1667 for (i=0; i<ai[ma]; i++){ 1668 d_nnz[aj[i]] ++; 1669 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1670 } 1671 1672 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1673 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1674 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1675 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1676 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1677 } else { 1678 B = *matout; 1679 } 1680 1681 /* copy over the A part */ 1682 array = Aloc->a; 1683 row = A->rmap->rstart; 1684 for (i=0; i<ma; i++) { 1685 ncol = ai[i+1]-ai[i]; 1686 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1687 row++; array += ncol; aj += ncol; 1688 } 1689 aj = Aloc->j; 1690 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1691 1692 /* copy over the B part */ 1693 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1694 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1695 array = Bloc->a; 1696 row = A->rmap->rstart; 1697 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1698 cols_tmp = cols; 1699 for (i=0; i<mb; i++) { 1700 ncol = bi[i+1]-bi[i]; 1701 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1702 row++; array += ncol; cols_tmp += ncol; 1703 } 1704 ierr = PetscFree(cols);CHKERRQ(ierr); 1705 1706 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1707 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1708 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1709 *matout = B; 1710 } else { 1711 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1712 } 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1718 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1719 { 1720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1721 Mat a = aij->A,b = aij->B; 1722 PetscErrorCode ierr; 1723 PetscInt s1,s2,s3; 1724 1725 PetscFunctionBegin; 1726 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1727 if (rr) { 1728 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1729 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1730 /* Overlap communication with computation. */ 1731 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1732 } 1733 if (ll) { 1734 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1735 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1736 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1737 } 1738 /* scale the diagonal block */ 1739 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1740 1741 if (rr) { 1742 /* Do a scatter end and then right scale the off-diagonal block */ 1743 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1744 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1745 } 1746 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1752 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1753 { 1754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1759 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1760 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1761 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1766 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1767 { 1768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatEqual_MPIAIJ" 1778 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1779 { 1780 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1781 Mat a,b,c,d; 1782 PetscBool flg; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 a = matA->A; b = matA->B; 1787 c = matB->A; d = matB->B; 1788 1789 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1790 if (flg) { 1791 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1792 } 1793 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatCopy_MPIAIJ" 1799 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1800 { 1801 PetscErrorCode ierr; 1802 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1803 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1804 1805 PetscFunctionBegin; 1806 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1807 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1808 /* because of the column compression in the off-processor part of the matrix a->B, 1809 the number of columns in a->B and b->B may be different, hence we cannot call 1810 the MatCopy() directly on the two parts. If need be, we can provide a more 1811 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1812 then copying the submatrices */ 1813 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1814 } else { 1815 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1816 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1817 } 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1823 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1824 { 1825 PetscErrorCode ierr; 1826 1827 PetscFunctionBegin; 1828 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatAXPY_MPIAIJ" 1834 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1835 { 1836 PetscErrorCode ierr; 1837 PetscInt i; 1838 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1839 PetscBLASInt bnz,one=1; 1840 Mat_SeqAIJ *x,*y; 1841 1842 PetscFunctionBegin; 1843 if (str == SAME_NONZERO_PATTERN) { 1844 PetscScalar alpha = a; 1845 x = (Mat_SeqAIJ *)xx->A->data; 1846 y = (Mat_SeqAIJ *)yy->A->data; 1847 bnz = PetscBLASIntCast(x->nz); 1848 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1849 x = (Mat_SeqAIJ *)xx->B->data; 1850 y = (Mat_SeqAIJ *)yy->B->data; 1851 bnz = PetscBLASIntCast(x->nz); 1852 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1853 } else if (str == SUBSET_NONZERO_PATTERN) { 1854 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1855 1856 x = (Mat_SeqAIJ *)xx->B->data; 1857 y = (Mat_SeqAIJ *)yy->B->data; 1858 if (y->xtoy && y->XtoY != xx->B) { 1859 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1860 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1861 } 1862 if (!y->xtoy) { /* get xtoy */ 1863 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1864 y->XtoY = xx->B; 1865 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1866 } 1867 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1868 } else { 1869 Mat B; 1870 PetscInt *nnz_d,*nnz_o; 1871 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 1872 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 1873 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 1874 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1875 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1876 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 1877 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1878 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 1879 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 1880 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1881 ierr = MatHeaderReplace(Y,B); 1882 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1883 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1884 } 1885 PetscFunctionReturn(0); 1886 } 1887 1888 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatConjugate_MPIAIJ" 1892 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1893 { 1894 #if defined(PETSC_USE_COMPLEX) 1895 PetscErrorCode ierr; 1896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1897 1898 PetscFunctionBegin; 1899 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1900 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1901 #else 1902 PetscFunctionBegin; 1903 #endif 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatRealPart_MPIAIJ" 1909 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1910 { 1911 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1916 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1922 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1923 { 1924 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1929 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #ifdef PETSC_HAVE_PBGL 1934 1935 #include <boost/parallel/mpi/bsp_process_group.hpp> 1936 #include <boost/graph/distributed/ilu_default_graph.hpp> 1937 #include <boost/graph/distributed/ilu_0_block.hpp> 1938 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1939 #include <boost/graph/distributed/petsc/interface.hpp> 1940 #include <boost/multi_array.hpp> 1941 #include <boost/parallel/distributed_property_map->hpp> 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1945 /* 1946 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1947 */ 1948 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1949 { 1950 namespace petsc = boost::distributed::petsc; 1951 1952 namespace graph_dist = boost::graph::distributed; 1953 using boost::graph::distributed::ilu_default::process_group_type; 1954 using boost::graph::ilu_permuted; 1955 1956 PetscBool row_identity, col_identity; 1957 PetscContainer c; 1958 PetscInt m, n, M, N; 1959 PetscErrorCode ierr; 1960 1961 PetscFunctionBegin; 1962 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1963 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1964 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1965 if (!row_identity || !col_identity) { 1966 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1967 } 1968 1969 process_group_type pg; 1970 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1971 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1972 lgraph_type& level_graph = *lgraph_p; 1973 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1974 1975 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1976 ilu_permuted(level_graph); 1977 1978 /* put together the new matrix */ 1979 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1980 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1981 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1982 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1983 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1984 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1985 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1986 1987 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1988 ierr = PetscContainerSetPointer(c, lgraph_p); 1989 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1995 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1996 { 1997 PetscFunctionBegin; 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatSolve_MPIAIJ" 2003 /* 2004 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2005 */ 2006 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2007 { 2008 namespace graph_dist = boost::graph::distributed; 2009 2010 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2011 lgraph_type* lgraph_p; 2012 PetscContainer c; 2013 PetscErrorCode ierr; 2014 2015 PetscFunctionBegin; 2016 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2017 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2018 ierr = VecCopy(b, x);CHKERRQ(ierr); 2019 2020 PetscScalar* array_x; 2021 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2022 PetscInt sx; 2023 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2024 2025 PetscScalar* array_b; 2026 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2027 PetscInt sb; 2028 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2029 2030 lgraph_type& level_graph = *lgraph_p; 2031 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2032 2033 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2034 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2035 ref_x(array_x, boost::extents[num_vertices(graph)]); 2036 2037 typedef boost::iterator_property_map<array_ref_type::iterator, 2038 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2039 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2040 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2041 2042 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2043 2044 PetscFunctionReturn(0); 2045 } 2046 #endif 2047 2048 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2049 PetscInt nzlocal,nsends,nrecvs; 2050 PetscMPIInt *send_rank,*recv_rank; 2051 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2052 PetscScalar *sbuf_a,**rbuf_a; 2053 PetscErrorCode (*MatDestroy)(Mat); 2054 } Mat_Redundant; 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2058 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2059 { 2060 PetscErrorCode ierr; 2061 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2062 PetscInt i; 2063 2064 PetscFunctionBegin; 2065 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2066 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2067 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2068 for (i=0; i<redund->nrecvs; i++){ 2069 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2070 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2071 } 2072 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2073 ierr = PetscFree(redund);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatDestroy_MatRedundant" 2079 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2080 { 2081 PetscErrorCode ierr; 2082 PetscContainer container; 2083 Mat_Redundant *redund=PETSC_NULL; 2084 2085 PetscFunctionBegin; 2086 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2087 if (container) { 2088 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2089 } else { 2090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2091 } 2092 A->ops->destroy = redund->MatDestroy; 2093 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2094 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2095 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2101 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2102 { 2103 PetscMPIInt rank,size; 2104 MPI_Comm comm=((PetscObject)mat)->comm; 2105 PetscErrorCode ierr; 2106 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2107 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2108 PetscInt *rowrange=mat->rmap->range; 2109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2110 Mat A=aij->A,B=aij->B,C=*matredundant; 2111 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2112 PetscScalar *sbuf_a; 2113 PetscInt nzlocal=a->nz+b->nz; 2114 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2115 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2116 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2117 MatScalar *aworkA,*aworkB; 2118 PetscScalar *vals; 2119 PetscMPIInt tag1,tag2,tag3,imdex; 2120 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2121 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2122 MPI_Status recv_status,*send_status; 2123 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2124 PetscInt **rbuf_j=PETSC_NULL; 2125 PetscScalar **rbuf_a=PETSC_NULL; 2126 Mat_Redundant *redund=PETSC_NULL; 2127 PetscContainer container; 2128 2129 PetscFunctionBegin; 2130 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2131 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2132 2133 if (reuse == MAT_REUSE_MATRIX) { 2134 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2135 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2136 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2137 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2138 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2139 if (container) { 2140 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2141 } else { 2142 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2143 } 2144 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2145 2146 nsends = redund->nsends; 2147 nrecvs = redund->nrecvs; 2148 send_rank = redund->send_rank; 2149 recv_rank = redund->recv_rank; 2150 sbuf_nz = redund->sbuf_nz; 2151 rbuf_nz = redund->rbuf_nz; 2152 sbuf_j = redund->sbuf_j; 2153 sbuf_a = redund->sbuf_a; 2154 rbuf_j = redund->rbuf_j; 2155 rbuf_a = redund->rbuf_a; 2156 } 2157 2158 if (reuse == MAT_INITIAL_MATRIX){ 2159 PetscMPIInt subrank,subsize; 2160 PetscInt nleftover,np_subcomm; 2161 /* get the destination processors' id send_rank, nsends and nrecvs */ 2162 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2163 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2164 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2165 np_subcomm = size/nsubcomm; 2166 nleftover = size - nsubcomm*np_subcomm; 2167 nsends = 0; nrecvs = 0; 2168 for (i=0; i<size; i++){ /* i=rank*/ 2169 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2170 send_rank[nsends] = i; nsends++; 2171 recv_rank[nrecvs++] = i; 2172 } 2173 } 2174 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2175 i = size-nleftover-1; 2176 j = 0; 2177 while (j < nsubcomm - nleftover){ 2178 send_rank[nsends++] = i; 2179 i--; j++; 2180 } 2181 } 2182 2183 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2184 for (i=0; i<nleftover; i++){ 2185 recv_rank[nrecvs++] = size-nleftover+i; 2186 } 2187 } 2188 2189 /* allocate sbuf_j, sbuf_a */ 2190 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2191 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2192 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2193 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2194 2195 /* copy mat's local entries into the buffers */ 2196 if (reuse == MAT_INITIAL_MATRIX){ 2197 rownz_max = 0; 2198 rptr = sbuf_j; 2199 cols = sbuf_j + rend-rstart + 1; 2200 vals = sbuf_a; 2201 rptr[0] = 0; 2202 for (i=0; i<rend-rstart; i++){ 2203 row = i + rstart; 2204 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2205 ncols = nzA + nzB; 2206 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2207 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2208 /* load the column indices for this row into cols */ 2209 lwrite = 0; 2210 for (l=0; l<nzB; l++) { 2211 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2212 vals[lwrite] = aworkB[l]; 2213 cols[lwrite++] = ctmp; 2214 } 2215 } 2216 for (l=0; l<nzA; l++){ 2217 vals[lwrite] = aworkA[l]; 2218 cols[lwrite++] = cstart + cworkA[l]; 2219 } 2220 for (l=0; l<nzB; l++) { 2221 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2222 vals[lwrite] = aworkB[l]; 2223 cols[lwrite++] = ctmp; 2224 } 2225 } 2226 vals += ncols; 2227 cols += ncols; 2228 rptr[i+1] = rptr[i] + ncols; 2229 if (rownz_max < ncols) rownz_max = ncols; 2230 } 2231 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2232 } else { /* only copy matrix values into sbuf_a */ 2233 rptr = sbuf_j; 2234 vals = sbuf_a; 2235 rptr[0] = 0; 2236 for (i=0; i<rend-rstart; i++){ 2237 row = i + rstart; 2238 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2239 ncols = nzA + nzB; 2240 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2241 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2242 lwrite = 0; 2243 for (l=0; l<nzB; l++) { 2244 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2245 } 2246 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2247 for (l=0; l<nzB; l++) { 2248 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2249 } 2250 vals += ncols; 2251 rptr[i+1] = rptr[i] + ncols; 2252 } 2253 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2254 2255 /* send nzlocal to others, and recv other's nzlocal */ 2256 /*--------------------------------------------------*/ 2257 if (reuse == MAT_INITIAL_MATRIX){ 2258 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2259 s_waits2 = s_waits3 + nsends; 2260 s_waits1 = s_waits2 + nsends; 2261 r_waits1 = s_waits1 + nsends; 2262 r_waits2 = r_waits1 + nrecvs; 2263 r_waits3 = r_waits2 + nrecvs; 2264 } else { 2265 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2266 r_waits3 = s_waits3 + nsends; 2267 } 2268 2269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2270 if (reuse == MAT_INITIAL_MATRIX){ 2271 /* get new tags to keep the communication clean */ 2272 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2274 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2275 2276 /* post receives of other's nzlocal */ 2277 for (i=0; i<nrecvs; i++){ 2278 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2279 } 2280 /* send nzlocal to others */ 2281 for (i=0; i<nsends; i++){ 2282 sbuf_nz[i] = nzlocal; 2283 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2284 } 2285 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2286 count = nrecvs; 2287 while (count) { 2288 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2289 recv_rank[imdex] = recv_status.MPI_SOURCE; 2290 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2291 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2292 2293 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2294 rbuf_nz[imdex] += i + 2; 2295 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2296 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2297 count--; 2298 } 2299 /* wait on sends of nzlocal */ 2300 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2301 /* send mat->i,j to others, and recv from other's */ 2302 /*------------------------------------------------*/ 2303 for (i=0; i<nsends; i++){ 2304 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2305 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2306 } 2307 /* wait on receives of mat->i,j */ 2308 /*------------------------------*/ 2309 count = nrecvs; 2310 while (count) { 2311 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2312 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2313 count--; 2314 } 2315 /* wait on sends of mat->i,j */ 2316 /*---------------------------*/ 2317 if (nsends) { 2318 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2319 } 2320 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2321 2322 /* post receives, send and receive mat->a */ 2323 /*----------------------------------------*/ 2324 for (imdex=0; imdex<nrecvs; imdex++) { 2325 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2326 } 2327 for (i=0; i<nsends; i++){ 2328 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2329 } 2330 count = nrecvs; 2331 while (count) { 2332 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2333 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2334 count--; 2335 } 2336 if (nsends) { 2337 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2338 } 2339 2340 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2341 2342 /* create redundant matrix */ 2343 /*-------------------------*/ 2344 if (reuse == MAT_INITIAL_MATRIX){ 2345 /* compute rownz_max for preallocation */ 2346 for (imdex=0; imdex<nrecvs; imdex++){ 2347 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2348 rptr = rbuf_j[imdex]; 2349 for (i=0; i<j; i++){ 2350 ncols = rptr[i+1] - rptr[i]; 2351 if (rownz_max < ncols) rownz_max = ncols; 2352 } 2353 } 2354 2355 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2356 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2357 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2358 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2359 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2360 } else { 2361 C = *matredundant; 2362 } 2363 2364 /* insert local matrix entries */ 2365 rptr = sbuf_j; 2366 cols = sbuf_j + rend-rstart + 1; 2367 vals = sbuf_a; 2368 for (i=0; i<rend-rstart; i++){ 2369 row = i + rstart; 2370 ncols = rptr[i+1] - rptr[i]; 2371 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2372 vals += ncols; 2373 cols += ncols; 2374 } 2375 /* insert received matrix entries */ 2376 for (imdex=0; imdex<nrecvs; imdex++){ 2377 rstart = rowrange[recv_rank[imdex]]; 2378 rend = rowrange[recv_rank[imdex]+1]; 2379 rptr = rbuf_j[imdex]; 2380 cols = rbuf_j[imdex] + rend-rstart + 1; 2381 vals = rbuf_a[imdex]; 2382 for (i=0; i<rend-rstart; i++){ 2383 row = i + rstart; 2384 ncols = rptr[i+1] - rptr[i]; 2385 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2386 vals += ncols; 2387 cols += ncols; 2388 } 2389 } 2390 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2393 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2394 if (reuse == MAT_INITIAL_MATRIX){ 2395 PetscContainer container; 2396 *matredundant = C; 2397 /* create a supporting struct and attach it to C for reuse */ 2398 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2399 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2400 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2401 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2402 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2403 2404 redund->nzlocal = nzlocal; 2405 redund->nsends = nsends; 2406 redund->nrecvs = nrecvs; 2407 redund->send_rank = send_rank; 2408 redund->recv_rank = recv_rank; 2409 redund->sbuf_nz = sbuf_nz; 2410 redund->rbuf_nz = rbuf_nz; 2411 redund->sbuf_j = sbuf_j; 2412 redund->sbuf_a = sbuf_a; 2413 redund->rbuf_j = rbuf_j; 2414 redund->rbuf_a = rbuf_a; 2415 2416 redund->MatDestroy = C->ops->destroy; 2417 C->ops->destroy = MatDestroy_MatRedundant; 2418 } 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2424 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2425 { 2426 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2427 PetscErrorCode ierr; 2428 PetscInt i,*idxb = 0; 2429 PetscScalar *va,*vb; 2430 Vec vtmp; 2431 2432 PetscFunctionBegin; 2433 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2434 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2435 if (idx) { 2436 for (i=0; i<A->rmap->n; i++) { 2437 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2438 } 2439 } 2440 2441 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2442 if (idx) { 2443 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2444 } 2445 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2446 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2447 2448 for (i=0; i<A->rmap->n; i++){ 2449 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2450 va[i] = vb[i]; 2451 if (idx) idx[i] = a->garray[idxb[i]]; 2452 } 2453 } 2454 2455 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2456 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2457 if (idxb) { 2458 ierr = PetscFree(idxb);CHKERRQ(ierr); 2459 } 2460 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2466 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2467 { 2468 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2469 PetscErrorCode ierr; 2470 PetscInt i,*idxb = 0; 2471 PetscScalar *va,*vb; 2472 Vec vtmp; 2473 2474 PetscFunctionBegin; 2475 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2476 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2477 if (idx) { 2478 for (i=0; i<A->cmap->n; i++) { 2479 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2480 } 2481 } 2482 2483 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2484 if (idx) { 2485 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2486 } 2487 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2488 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2489 2490 for (i=0; i<A->rmap->n; i++){ 2491 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2492 va[i] = vb[i]; 2493 if (idx) idx[i] = a->garray[idxb[i]]; 2494 } 2495 } 2496 2497 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2498 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2499 if (idxb) { 2500 ierr = PetscFree(idxb);CHKERRQ(ierr); 2501 } 2502 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2508 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2509 { 2510 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2511 PetscInt n = A->rmap->n; 2512 PetscInt cstart = A->cmap->rstart; 2513 PetscInt *cmap = mat->garray; 2514 PetscInt *diagIdx, *offdiagIdx; 2515 Vec diagV, offdiagV; 2516 PetscScalar *a, *diagA, *offdiagA; 2517 PetscInt r; 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2522 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2523 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2524 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2525 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2526 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2527 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2528 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2529 for(r = 0; r < n; ++r) { 2530 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2531 a[r] = diagA[r]; 2532 idx[r] = cstart + diagIdx[r]; 2533 } else { 2534 a[r] = offdiagA[r]; 2535 idx[r] = cmap[offdiagIdx[r]]; 2536 } 2537 } 2538 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2539 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2540 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2541 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2542 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2543 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2549 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2550 { 2551 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2552 PetscInt n = A->rmap->n; 2553 PetscInt cstart = A->cmap->rstart; 2554 PetscInt *cmap = mat->garray; 2555 PetscInt *diagIdx, *offdiagIdx; 2556 Vec diagV, offdiagV; 2557 PetscScalar *a, *diagA, *offdiagA; 2558 PetscInt r; 2559 PetscErrorCode ierr; 2560 2561 PetscFunctionBegin; 2562 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2563 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2564 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2565 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2566 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2567 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2568 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2569 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2570 for(r = 0; r < n; ++r) { 2571 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2572 a[r] = diagA[r]; 2573 idx[r] = cstart + diagIdx[r]; 2574 } else { 2575 a[r] = offdiagA[r]; 2576 idx[r] = cmap[offdiagIdx[r]]; 2577 } 2578 } 2579 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2580 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2581 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2582 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2583 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2584 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2590 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2591 { 2592 PetscErrorCode ierr; 2593 Mat *dummy; 2594 2595 PetscFunctionBegin; 2596 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2597 *newmat = *dummy; 2598 ierr = PetscFree(dummy);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2603 /* -------------------------------------------------------------------*/ 2604 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2605 MatGetRow_MPIAIJ, 2606 MatRestoreRow_MPIAIJ, 2607 MatMult_MPIAIJ, 2608 /* 4*/ MatMultAdd_MPIAIJ, 2609 MatMultTranspose_MPIAIJ, 2610 MatMultTransposeAdd_MPIAIJ, 2611 #ifdef PETSC_HAVE_PBGL 2612 MatSolve_MPIAIJ, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 /*10*/ 0, 2619 0, 2620 0, 2621 MatSOR_MPIAIJ, 2622 MatTranspose_MPIAIJ, 2623 /*15*/ MatGetInfo_MPIAIJ, 2624 MatEqual_MPIAIJ, 2625 MatGetDiagonal_MPIAIJ, 2626 MatDiagonalScale_MPIAIJ, 2627 MatNorm_MPIAIJ, 2628 /*20*/ MatAssemblyBegin_MPIAIJ, 2629 MatAssemblyEnd_MPIAIJ, 2630 MatSetOption_MPIAIJ, 2631 MatZeroEntries_MPIAIJ, 2632 /*24*/ MatZeroRows_MPIAIJ, 2633 0, 2634 #ifdef PETSC_HAVE_PBGL 2635 0, 2636 #else 2637 0, 2638 #endif 2639 0, 2640 0, 2641 /*29*/ MatSetUpPreallocation_MPIAIJ, 2642 #ifdef PETSC_HAVE_PBGL 2643 0, 2644 #else 2645 0, 2646 #endif 2647 0, 2648 0, 2649 0, 2650 /*34*/ MatDuplicate_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*39*/ MatAXPY_MPIAIJ, 2656 MatGetSubMatrices_MPIAIJ, 2657 MatIncreaseOverlap_MPIAIJ, 2658 MatGetValues_MPIAIJ, 2659 MatCopy_MPIAIJ, 2660 /*44*/ MatGetRowMax_MPIAIJ, 2661 MatScale_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 /*49*/ MatSetBlockSize_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*54*/ MatFDColoringCreate_MPIAIJ, 2671 0, 2672 MatSetUnfactored_MPIAIJ, 2673 MatPermute_MPIAIJ, 2674 0, 2675 /*59*/ MatGetSubMatrix_MPIAIJ, 2676 MatDestroy_MPIAIJ, 2677 MatView_MPIAIJ, 2678 0, 2679 0, 2680 /*64*/ 0, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2686 MatGetRowMinAbs_MPIAIJ, 2687 0, 2688 MatSetColoring_MPIAIJ, 2689 #if defined(PETSC_HAVE_ADIC) 2690 MatSetValuesAdic_MPIAIJ, 2691 #else 2692 0, 2693 #endif 2694 MatSetValuesAdifor_MPIAIJ, 2695 /*75*/ MatFDColoringApply_AIJ, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /*80*/ 0, 2701 0, 2702 0, 2703 /*83*/ MatLoad_MPIAIJ, 2704 0, 2705 0, 2706 0, 2707 0, 2708 0, 2709 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2710 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2711 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2712 MatPtAP_Basic, 2713 MatPtAPSymbolic_MPIAIJ, 2714 /*94*/ MatPtAPNumeric_MPIAIJ, 2715 0, 2716 0, 2717 0, 2718 0, 2719 /*99*/ 0, 2720 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2721 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2722 MatConjugate_MPIAIJ, 2723 0, 2724 /*104*/MatSetValuesRow_MPIAIJ, 2725 MatRealPart_MPIAIJ, 2726 MatImaginaryPart_MPIAIJ, 2727 0, 2728 0, 2729 /*109*/0, 2730 MatGetRedundantMatrix_MPIAIJ, 2731 MatGetRowMin_MPIAIJ, 2732 0, 2733 0, 2734 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2735 0, 2736 0, 2737 0, 2738 0, 2739 /*119*/0, 2740 0, 2741 0, 2742 0, 2743 MatGetMultiProcBlock_MPIAIJ 2744 }; 2745 2746 /* ----------------------------------------------------------------------------------------*/ 2747 2748 EXTERN_C_BEGIN 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2751 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2752 { 2753 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2754 PetscErrorCode ierr; 2755 2756 PetscFunctionBegin; 2757 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2758 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2759 PetscFunctionReturn(0); 2760 } 2761 EXTERN_C_END 2762 2763 EXTERN_C_BEGIN 2764 #undef __FUNCT__ 2765 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2766 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2767 { 2768 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2769 PetscErrorCode ierr; 2770 2771 PetscFunctionBegin; 2772 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2773 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2774 PetscFunctionReturn(0); 2775 } 2776 EXTERN_C_END 2777 2778 EXTERN_C_BEGIN 2779 #undef __FUNCT__ 2780 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2781 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2782 { 2783 Mat_MPIAIJ *b; 2784 PetscErrorCode ierr; 2785 PetscInt i; 2786 2787 PetscFunctionBegin; 2788 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2789 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2790 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2791 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2792 2793 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2794 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2795 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2796 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2797 if (d_nnz) { 2798 for (i=0; i<B->rmap->n; i++) { 2799 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2800 } 2801 } 2802 if (o_nnz) { 2803 for (i=0; i<B->rmap->n; i++) { 2804 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2805 } 2806 } 2807 b = (Mat_MPIAIJ*)B->data; 2808 2809 if (!B->preallocated) { 2810 /* Explicitly create 2 MATSEQAIJ matrices. */ 2811 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2812 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2813 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2814 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2815 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2816 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2817 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2818 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2819 } 2820 2821 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2822 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2823 B->preallocated = PETSC_TRUE; 2824 PetscFunctionReturn(0); 2825 } 2826 EXTERN_C_END 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2830 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2831 { 2832 Mat mat; 2833 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2834 PetscErrorCode ierr; 2835 2836 PetscFunctionBegin; 2837 *newmat = 0; 2838 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2839 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2840 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2841 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2842 a = (Mat_MPIAIJ*)mat->data; 2843 2844 mat->factortype = matin->factortype; 2845 mat->rmap->bs = matin->rmap->bs; 2846 mat->assembled = PETSC_TRUE; 2847 mat->insertmode = NOT_SET_VALUES; 2848 mat->preallocated = PETSC_TRUE; 2849 2850 a->size = oldmat->size; 2851 a->rank = oldmat->rank; 2852 a->donotstash = oldmat->donotstash; 2853 a->roworiented = oldmat->roworiented; 2854 a->rowindices = 0; 2855 a->rowvalues = 0; 2856 a->getrowactive = PETSC_FALSE; 2857 2858 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2859 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2860 2861 if (oldmat->colmap) { 2862 #if defined (PETSC_USE_CTABLE) 2863 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2864 #else 2865 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2866 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2867 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2868 #endif 2869 } else a->colmap = 0; 2870 if (oldmat->garray) { 2871 PetscInt len; 2872 len = oldmat->B->cmap->n; 2873 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2874 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2875 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2876 } else a->garray = 0; 2877 2878 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2879 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2880 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2882 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2883 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2884 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2886 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2887 *newmat = mat; 2888 PetscFunctionReturn(0); 2889 } 2890 2891 /* 2892 Allows sending/receiving larger messages then 2 gigabytes in a single call 2893 */ 2894 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2895 { 2896 int ierr; 2897 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2898 PetscInt i,numchunks; 2899 PetscMPIInt icnt; 2900 2901 numchunks = cnt/CHUNKSIZE + 1; 2902 for (i=0; i<numchunks; i++) { 2903 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2904 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2905 if (type == MPIU_INT) { 2906 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2907 } else if (type == MPIU_SCALAR) { 2908 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2909 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2910 } 2911 return 0; 2912 } 2913 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2914 { 2915 int ierr; 2916 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2917 MPI_Status status; 2918 PetscInt i,numchunks; 2919 PetscMPIInt icnt; 2920 2921 numchunks = cnt/CHUNKSIZE + 1; 2922 for (i=0; i<numchunks; i++) { 2923 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2924 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2925 if (type == MPIU_INT) { 2926 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2927 } else if (type == MPIU_SCALAR) { 2928 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2929 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2930 } 2931 return 0; 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "MatLoad_MPIAIJ" 2936 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2937 { 2938 PetscScalar *vals,*svals; 2939 MPI_Comm comm = ((PetscObject)viewer)->comm; 2940 PetscErrorCode ierr; 2941 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2942 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2943 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2944 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2945 PetscInt cend,cstart,n,*rowners,sizesset=1; 2946 int fd; 2947 2948 PetscFunctionBegin; 2949 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2950 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2951 if (!rank) { 2952 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2953 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2954 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2955 } 2956 2957 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2958 2959 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2960 M = header[1]; N = header[2]; 2961 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2962 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2963 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2964 2965 /* If global sizes are set, check if they are consistent with that given in the file */ 2966 if (sizesset) { 2967 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2968 } 2969 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2970 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2971 2972 /* determine ownership of all rows */ 2973 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2974 else m = newMat->rmap->n; /* Set by user */ 2975 2976 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2977 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2978 2979 /* First process needs enough room for process with most rows */ 2980 if (!rank) { 2981 mmax = rowners[1]; 2982 for (i=2; i<size; i++) { 2983 mmax = PetscMax(mmax,rowners[i]); 2984 } 2985 } else mmax = m; 2986 2987 rowners[0] = 0; 2988 for (i=2; i<=size; i++) { 2989 rowners[i] += rowners[i-1]; 2990 } 2991 rstart = rowners[rank]; 2992 rend = rowners[rank+1]; 2993 2994 /* distribute row lengths to all processors */ 2995 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2996 if (!rank) { 2997 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2998 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2999 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3000 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3001 for (j=0; j<m; j++) { 3002 procsnz[0] += ourlens[j]; 3003 } 3004 for (i=1; i<size; i++) { 3005 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3006 /* calculate the number of nonzeros on each processor */ 3007 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3008 procsnz[i] += rowlengths[j]; 3009 } 3010 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3011 } 3012 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3013 } else { 3014 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3015 } 3016 3017 if (!rank) { 3018 /* determine max buffer needed and allocate it */ 3019 maxnz = 0; 3020 for (i=0; i<size; i++) { 3021 maxnz = PetscMax(maxnz,procsnz[i]); 3022 } 3023 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3024 3025 /* read in my part of the matrix column indices */ 3026 nz = procsnz[0]; 3027 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3028 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3029 3030 /* read in every one elses and ship off */ 3031 for (i=1; i<size; i++) { 3032 nz = procsnz[i]; 3033 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3034 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3035 } 3036 ierr = PetscFree(cols);CHKERRQ(ierr); 3037 } else { 3038 /* determine buffer space needed for message */ 3039 nz = 0; 3040 for (i=0; i<m; i++) { 3041 nz += ourlens[i]; 3042 } 3043 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3044 3045 /* receive message of column indices*/ 3046 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3047 } 3048 3049 /* determine column ownership if matrix is not square */ 3050 if (N != M) { 3051 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3052 else n = newMat->cmap->n; 3053 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3054 cstart = cend - n; 3055 } else { 3056 cstart = rstart; 3057 cend = rend; 3058 n = cend - cstart; 3059 } 3060 3061 /* loop over local rows, determining number of off diagonal entries */ 3062 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3063 jj = 0; 3064 for (i=0; i<m; i++) { 3065 for (j=0; j<ourlens[i]; j++) { 3066 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3067 jj++; 3068 } 3069 } 3070 3071 for (i=0; i<m; i++) { 3072 ourlens[i] -= offlens[i]; 3073 } 3074 if (!sizesset) { 3075 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3076 } 3077 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3078 3079 for (i=0; i<m; i++) { 3080 ourlens[i] += offlens[i]; 3081 } 3082 3083 if (!rank) { 3084 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3085 3086 /* read in my part of the matrix numerical values */ 3087 nz = procsnz[0]; 3088 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3089 3090 /* insert into matrix */ 3091 jj = rstart; 3092 smycols = mycols; 3093 svals = vals; 3094 for (i=0; i<m; i++) { 3095 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3096 smycols += ourlens[i]; 3097 svals += ourlens[i]; 3098 jj++; 3099 } 3100 3101 /* read in other processors and ship out */ 3102 for (i=1; i<size; i++) { 3103 nz = procsnz[i]; 3104 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3105 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3106 } 3107 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3108 } else { 3109 /* receive numeric values */ 3110 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3111 3112 /* receive message of values*/ 3113 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3114 3115 /* insert into matrix */ 3116 jj = rstart; 3117 smycols = mycols; 3118 svals = vals; 3119 for (i=0; i<m; i++) { 3120 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3121 smycols += ourlens[i]; 3122 svals += ourlens[i]; 3123 jj++; 3124 } 3125 } 3126 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3127 ierr = PetscFree(vals);CHKERRQ(ierr); 3128 ierr = PetscFree(mycols);CHKERRQ(ierr); 3129 ierr = PetscFree(rowners);CHKERRQ(ierr); 3130 3131 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3132 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 PetscFunctionReturn(0); 3134 } 3135 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3138 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3139 { 3140 PetscErrorCode ierr; 3141 IS iscol_local; 3142 PetscInt csize; 3143 3144 PetscFunctionBegin; 3145 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3146 if (call == MAT_REUSE_MATRIX) { 3147 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3148 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3149 } else { 3150 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3151 } 3152 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3153 if (call == MAT_INITIAL_MATRIX) { 3154 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3155 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3156 } 3157 PetscFunctionReturn(0); 3158 } 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3162 /* 3163 Not great since it makes two copies of the submatrix, first an SeqAIJ 3164 in local and then by concatenating the local matrices the end result. 3165 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3166 3167 Note: This requires a sequential iscol with all indices. 3168 */ 3169 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3170 { 3171 PetscErrorCode ierr; 3172 PetscMPIInt rank,size; 3173 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3174 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3175 Mat *local,M,Mreuse; 3176 MatScalar *vwork,*aa; 3177 MPI_Comm comm = ((PetscObject)mat)->comm; 3178 Mat_SeqAIJ *aij; 3179 3180 3181 PetscFunctionBegin; 3182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3184 3185 if (call == MAT_REUSE_MATRIX) { 3186 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3187 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3188 local = &Mreuse; 3189 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3190 } else { 3191 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3192 Mreuse = *local; 3193 ierr = PetscFree(local);CHKERRQ(ierr); 3194 } 3195 3196 /* 3197 m - number of local rows 3198 n - number of columns (same on all processors) 3199 rstart - first row in new global matrix generated 3200 */ 3201 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3202 if (call == MAT_INITIAL_MATRIX) { 3203 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3204 ii = aij->i; 3205 jj = aij->j; 3206 3207 /* 3208 Determine the number of non-zeros in the diagonal and off-diagonal 3209 portions of the matrix in order to do correct preallocation 3210 */ 3211 3212 /* first get start and end of "diagonal" columns */ 3213 if (csize == PETSC_DECIDE) { 3214 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3215 if (mglobal == n) { /* square matrix */ 3216 nlocal = m; 3217 } else { 3218 nlocal = n/size + ((n % size) > rank); 3219 } 3220 } else { 3221 nlocal = csize; 3222 } 3223 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3224 rstart = rend - nlocal; 3225 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3226 3227 /* next, compute all the lengths */ 3228 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3229 olens = dlens + m; 3230 for (i=0; i<m; i++) { 3231 jend = ii[i+1] - ii[i]; 3232 olen = 0; 3233 dlen = 0; 3234 for (j=0; j<jend; j++) { 3235 if (*jj < rstart || *jj >= rend) olen++; 3236 else dlen++; 3237 jj++; 3238 } 3239 olens[i] = olen; 3240 dlens[i] = dlen; 3241 } 3242 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3243 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3244 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3245 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3246 ierr = PetscFree(dlens);CHKERRQ(ierr); 3247 } else { 3248 PetscInt ml,nl; 3249 3250 M = *newmat; 3251 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3252 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3253 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3254 /* 3255 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3256 rather than the slower MatSetValues(). 3257 */ 3258 M->was_assembled = PETSC_TRUE; 3259 M->assembled = PETSC_FALSE; 3260 } 3261 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3262 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3263 ii = aij->i; 3264 jj = aij->j; 3265 aa = aij->a; 3266 for (i=0; i<m; i++) { 3267 row = rstart + i; 3268 nz = ii[i+1] - ii[i]; 3269 cwork = jj; jj += nz; 3270 vwork = aa; aa += nz; 3271 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3272 } 3273 3274 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3275 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3276 *newmat = M; 3277 3278 /* save submatrix used in processor for next request */ 3279 if (call == MAT_INITIAL_MATRIX) { 3280 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3281 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3282 } 3283 3284 PetscFunctionReturn(0); 3285 } 3286 3287 EXTERN_C_BEGIN 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3291 { 3292 PetscInt m,cstart, cend,j,nnz,i,d; 3293 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3294 const PetscInt *JJ; 3295 PetscScalar *values; 3296 PetscErrorCode ierr; 3297 3298 PetscFunctionBegin; 3299 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3300 3301 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3302 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3303 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3304 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3305 m = B->rmap->n; 3306 cstart = B->cmap->rstart; 3307 cend = B->cmap->rend; 3308 rstart = B->rmap->rstart; 3309 3310 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3311 3312 #if defined(PETSC_USE_DEBUGGING) 3313 for (i=0; i<m; i++) { 3314 nnz = Ii[i+1]- Ii[i]; 3315 JJ = J + Ii[i]; 3316 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3317 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3318 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3319 } 3320 #endif 3321 3322 for (i=0; i<m; i++) { 3323 nnz = Ii[i+1]- Ii[i]; 3324 JJ = J + Ii[i]; 3325 nnz_max = PetscMax(nnz_max,nnz); 3326 d = 0; 3327 for (j=0; j<nnz; j++) { 3328 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3329 } 3330 d_nnz[i] = d; 3331 o_nnz[i] = nnz - d; 3332 } 3333 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3334 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3335 3336 if (v) values = (PetscScalar*)v; 3337 else { 3338 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3339 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3340 } 3341 3342 for (i=0; i<m; i++) { 3343 ii = i + rstart; 3344 nnz = Ii[i+1]- Ii[i]; 3345 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3346 } 3347 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3348 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3349 3350 if (!v) { 3351 ierr = PetscFree(values);CHKERRQ(ierr); 3352 } 3353 PetscFunctionReturn(0); 3354 } 3355 EXTERN_C_END 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3359 /*@ 3360 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3361 (the default parallel PETSc format). 3362 3363 Collective on MPI_Comm 3364 3365 Input Parameters: 3366 + B - the matrix 3367 . i - the indices into j for the start of each local row (starts with zero) 3368 . j - the column indices for each local row (starts with zero) 3369 - v - optional values in the matrix 3370 3371 Level: developer 3372 3373 Notes: 3374 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3375 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3376 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3377 3378 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3379 3380 The format which is used for the sparse matrix input, is equivalent to a 3381 row-major ordering.. i.e for the following matrix, the input data expected is 3382 as shown: 3383 3384 1 0 0 3385 2 0 3 P0 3386 ------- 3387 4 5 6 P1 3388 3389 Process0 [P0]: rows_owned=[0,1] 3390 i = {0,1,3} [size = nrow+1 = 2+1] 3391 j = {0,0,2} [size = nz = 6] 3392 v = {1,2,3} [size = nz = 6] 3393 3394 Process1 [P1]: rows_owned=[2] 3395 i = {0,3} [size = nrow+1 = 1+1] 3396 j = {0,1,2} [size = nz = 6] 3397 v = {4,5,6} [size = nz = 6] 3398 3399 .keywords: matrix, aij, compressed row, sparse, parallel 3400 3401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3402 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3403 @*/ 3404 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3405 { 3406 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3407 3408 PetscFunctionBegin; 3409 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3410 if (f) { 3411 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3412 } 3413 PetscFunctionReturn(0); 3414 } 3415 3416 #undef __FUNCT__ 3417 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3418 /*@C 3419 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3420 (the default parallel PETSc format). For good matrix assembly performance 3421 the user should preallocate the matrix storage by setting the parameters 3422 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3423 performance can be increased by more than a factor of 50. 3424 3425 Collective on MPI_Comm 3426 3427 Input Parameters: 3428 + A - the matrix 3429 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3430 (same value is used for all local rows) 3431 . d_nnz - array containing the number of nonzeros in the various rows of the 3432 DIAGONAL portion of the local submatrix (possibly different for each row) 3433 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3434 The size of this array is equal to the number of local rows, i.e 'm'. 3435 You must leave room for the diagonal entry even if it is zero. 3436 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3437 submatrix (same value is used for all local rows). 3438 - o_nnz - array containing the number of nonzeros in the various rows of the 3439 OFF-DIAGONAL portion of the local submatrix (possibly different for 3440 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3441 structure. The size of this array is equal to the number 3442 of local rows, i.e 'm'. 3443 3444 If the *_nnz parameter is given then the *_nz parameter is ignored 3445 3446 The AIJ format (also called the Yale sparse matrix format or 3447 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3448 storage. The stored row and column indices begin with zero. 3449 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3450 3451 The parallel matrix is partitioned such that the first m0 rows belong to 3452 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3453 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3454 3455 The DIAGONAL portion of the local submatrix of a processor can be defined 3456 as the submatrix which is obtained by extraction the part corresponding to 3457 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3458 first row that belongs to the processor, r2 is the last row belonging to 3459 the this processor, and c1-c2 is range of indices of the local part of a 3460 vector suitable for applying the matrix to. This is an mxn matrix. In the 3461 common case of a square matrix, the row and column ranges are the same and 3462 the DIAGONAL part is also square. The remaining portion of the local 3463 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3464 3465 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3466 3467 You can call MatGetInfo() to get information on how effective the preallocation was; 3468 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3469 You can also run with the option -info and look for messages with the string 3470 malloc in them to see if additional memory allocation was needed. 3471 3472 Example usage: 3473 3474 Consider the following 8x8 matrix with 34 non-zero values, that is 3475 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3476 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3477 as follows: 3478 3479 .vb 3480 1 2 0 | 0 3 0 | 0 4 3481 Proc0 0 5 6 | 7 0 0 | 8 0 3482 9 0 10 | 11 0 0 | 12 0 3483 ------------------------------------- 3484 13 0 14 | 15 16 17 | 0 0 3485 Proc1 0 18 0 | 19 20 21 | 0 0 3486 0 0 0 | 22 23 0 | 24 0 3487 ------------------------------------- 3488 Proc2 25 26 27 | 0 0 28 | 29 0 3489 30 0 0 | 31 32 33 | 0 34 3490 .ve 3491 3492 This can be represented as a collection of submatrices as: 3493 3494 .vb 3495 A B C 3496 D E F 3497 G H I 3498 .ve 3499 3500 Where the submatrices A,B,C are owned by proc0, D,E,F are 3501 owned by proc1, G,H,I are owned by proc2. 3502 3503 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3504 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3505 The 'M','N' parameters are 8,8, and have the same values on all procs. 3506 3507 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3508 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3509 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3510 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3511 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3512 matrix, ans [DF] as another SeqAIJ matrix. 3513 3514 When d_nz, o_nz parameters are specified, d_nz storage elements are 3515 allocated for every row of the local diagonal submatrix, and o_nz 3516 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3517 One way to choose d_nz and o_nz is to use the max nonzerors per local 3518 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3519 In this case, the values of d_nz,o_nz are: 3520 .vb 3521 proc0 : dnz = 2, o_nz = 2 3522 proc1 : dnz = 3, o_nz = 2 3523 proc2 : dnz = 1, o_nz = 4 3524 .ve 3525 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3526 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3527 for proc3. i.e we are using 12+15+10=37 storage locations to store 3528 34 values. 3529 3530 When d_nnz, o_nnz parameters are specified, the storage is specified 3531 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3532 In the above case the values for d_nnz,o_nnz are: 3533 .vb 3534 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3535 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3536 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3537 .ve 3538 Here the space allocated is sum of all the above values i.e 34, and 3539 hence pre-allocation is perfect. 3540 3541 Level: intermediate 3542 3543 .keywords: matrix, aij, compressed row, sparse, parallel 3544 3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3546 MPIAIJ, MatGetInfo() 3547 @*/ 3548 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3549 { 3550 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3551 3552 PetscFunctionBegin; 3553 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3554 if (f) { 3555 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3556 } 3557 PetscFunctionReturn(0); 3558 } 3559 3560 #undef __FUNCT__ 3561 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3562 /*@ 3563 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3564 CSR format the local rows. 3565 3566 Collective on MPI_Comm 3567 3568 Input Parameters: 3569 + comm - MPI communicator 3570 . m - number of local rows (Cannot be PETSC_DECIDE) 3571 . n - This value should be the same as the local size used in creating the 3572 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3573 calculated if N is given) For square matrices n is almost always m. 3574 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3575 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3576 . i - row indices 3577 . j - column indices 3578 - a - matrix values 3579 3580 Output Parameter: 3581 . mat - the matrix 3582 3583 Level: intermediate 3584 3585 Notes: 3586 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3587 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3588 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3589 3590 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3591 3592 The format which is used for the sparse matrix input, is equivalent to a 3593 row-major ordering.. i.e for the following matrix, the input data expected is 3594 as shown: 3595 3596 1 0 0 3597 2 0 3 P0 3598 ------- 3599 4 5 6 P1 3600 3601 Process0 [P0]: rows_owned=[0,1] 3602 i = {0,1,3} [size = nrow+1 = 2+1] 3603 j = {0,0,2} [size = nz = 6] 3604 v = {1,2,3} [size = nz = 6] 3605 3606 Process1 [P1]: rows_owned=[2] 3607 i = {0,3} [size = nrow+1 = 1+1] 3608 j = {0,1,2} [size = nz = 6] 3609 v = {4,5,6} [size = nz = 6] 3610 3611 .keywords: matrix, aij, compressed row, sparse, parallel 3612 3613 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3614 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3615 @*/ 3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3617 { 3618 PetscErrorCode ierr; 3619 3620 PetscFunctionBegin; 3621 if (i[0]) { 3622 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3623 } 3624 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3625 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3626 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3627 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3628 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3629 PetscFunctionReturn(0); 3630 } 3631 3632 #undef __FUNCT__ 3633 #define __FUNCT__ "MatCreateMPIAIJ" 3634 /*@C 3635 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3636 (the default parallel PETSc format). For good matrix assembly performance 3637 the user should preallocate the matrix storage by setting the parameters 3638 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3639 performance can be increased by more than a factor of 50. 3640 3641 Collective on MPI_Comm 3642 3643 Input Parameters: 3644 + comm - MPI communicator 3645 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3646 This value should be the same as the local size used in creating the 3647 y vector for the matrix-vector product y = Ax. 3648 . n - This value should be the same as the local size used in creating the 3649 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3650 calculated if N is given) For square matrices n is almost always m. 3651 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3652 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3653 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3654 (same value is used for all local rows) 3655 . d_nnz - array containing the number of nonzeros in the various rows of the 3656 DIAGONAL portion of the local submatrix (possibly different for each row) 3657 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3658 The size of this array is equal to the number of local rows, i.e 'm'. 3659 You must leave room for the diagonal entry even if it is zero. 3660 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3661 submatrix (same value is used for all local rows). 3662 - o_nnz - array containing the number of nonzeros in the various rows of the 3663 OFF-DIAGONAL portion of the local submatrix (possibly different for 3664 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3665 structure. The size of this array is equal to the number 3666 of local rows, i.e 'm'. 3667 3668 Output Parameter: 3669 . A - the matrix 3670 3671 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3672 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3673 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3674 3675 Notes: 3676 If the *_nnz parameter is given then the *_nz parameter is ignored 3677 3678 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3679 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3680 storage requirements for this matrix. 3681 3682 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3683 processor than it must be used on all processors that share the object for 3684 that argument. 3685 3686 The user MUST specify either the local or global matrix dimensions 3687 (possibly both). 3688 3689 The parallel matrix is partitioned across processors such that the 3690 first m0 rows belong to process 0, the next m1 rows belong to 3691 process 1, the next m2 rows belong to process 2 etc.. where 3692 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3693 values corresponding to [m x N] submatrix. 3694 3695 The columns are logically partitioned with the n0 columns belonging 3696 to 0th partition, the next n1 columns belonging to the next 3697 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3698 3699 The DIAGONAL portion of the local submatrix on any given processor 3700 is the submatrix corresponding to the rows and columns m,n 3701 corresponding to the given processor. i.e diagonal matrix on 3702 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3703 etc. The remaining portion of the local submatrix [m x (N-n)] 3704 constitute the OFF-DIAGONAL portion. The example below better 3705 illustrates this concept. 3706 3707 For a square global matrix we define each processor's diagonal portion 3708 to be its local rows and the corresponding columns (a square submatrix); 3709 each processor's off-diagonal portion encompasses the remainder of the 3710 local matrix (a rectangular submatrix). 3711 3712 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3713 3714 When calling this routine with a single process communicator, a matrix of 3715 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3716 type of communicator, use the construction mechanism: 3717 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3718 3719 By default, this format uses inodes (identical nodes) when possible. 3720 We search for consecutive rows with the same nonzero structure, thereby 3721 reusing matrix information to achieve increased efficiency. 3722 3723 Options Database Keys: 3724 + -mat_no_inode - Do not use inodes 3725 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3726 - -mat_aij_oneindex - Internally use indexing starting at 1 3727 rather than 0. Note that when calling MatSetValues(), 3728 the user still MUST index entries starting at 0! 3729 3730 3731 Example usage: 3732 3733 Consider the following 8x8 matrix with 34 non-zero values, that is 3734 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3735 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3736 as follows: 3737 3738 .vb 3739 1 2 0 | 0 3 0 | 0 4 3740 Proc0 0 5 6 | 7 0 0 | 8 0 3741 9 0 10 | 11 0 0 | 12 0 3742 ------------------------------------- 3743 13 0 14 | 15 16 17 | 0 0 3744 Proc1 0 18 0 | 19 20 21 | 0 0 3745 0 0 0 | 22 23 0 | 24 0 3746 ------------------------------------- 3747 Proc2 25 26 27 | 0 0 28 | 29 0 3748 30 0 0 | 31 32 33 | 0 34 3749 .ve 3750 3751 This can be represented as a collection of submatrices as: 3752 3753 .vb 3754 A B C 3755 D E F 3756 G H I 3757 .ve 3758 3759 Where the submatrices A,B,C are owned by proc0, D,E,F are 3760 owned by proc1, G,H,I are owned by proc2. 3761 3762 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3763 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3764 The 'M','N' parameters are 8,8, and have the same values on all procs. 3765 3766 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3767 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3768 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3769 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3770 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3771 matrix, ans [DF] as another SeqAIJ matrix. 3772 3773 When d_nz, o_nz parameters are specified, d_nz storage elements are 3774 allocated for every row of the local diagonal submatrix, and o_nz 3775 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3776 One way to choose d_nz and o_nz is to use the max nonzerors per local 3777 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3778 In this case, the values of d_nz,o_nz are: 3779 .vb 3780 proc0 : dnz = 2, o_nz = 2 3781 proc1 : dnz = 3, o_nz = 2 3782 proc2 : dnz = 1, o_nz = 4 3783 .ve 3784 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3785 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3786 for proc3. i.e we are using 12+15+10=37 storage locations to store 3787 34 values. 3788 3789 When d_nnz, o_nnz parameters are specified, the storage is specified 3790 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3791 In the above case the values for d_nnz,o_nnz are: 3792 .vb 3793 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3794 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3795 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3796 .ve 3797 Here the space allocated is sum of all the above values i.e 34, and 3798 hence pre-allocation is perfect. 3799 3800 Level: intermediate 3801 3802 .keywords: matrix, aij, compressed row, sparse, parallel 3803 3804 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3805 MPIAIJ, MatCreateMPIAIJWithArrays() 3806 @*/ 3807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3808 { 3809 PetscErrorCode ierr; 3810 PetscMPIInt size; 3811 3812 PetscFunctionBegin; 3813 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3814 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3815 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3816 if (size > 1) { 3817 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3818 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3819 } else { 3820 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3821 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3822 } 3823 PetscFunctionReturn(0); 3824 } 3825 3826 #undef __FUNCT__ 3827 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3828 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3829 { 3830 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3831 3832 PetscFunctionBegin; 3833 *Ad = a->A; 3834 *Ao = a->B; 3835 *colmap = a->garray; 3836 PetscFunctionReturn(0); 3837 } 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3841 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3842 { 3843 PetscErrorCode ierr; 3844 PetscInt i; 3845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3846 3847 PetscFunctionBegin; 3848 if (coloring->ctype == IS_COLORING_GLOBAL) { 3849 ISColoringValue *allcolors,*colors; 3850 ISColoring ocoloring; 3851 3852 /* set coloring for diagonal portion */ 3853 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3854 3855 /* set coloring for off-diagonal portion */ 3856 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3857 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3858 for (i=0; i<a->B->cmap->n; i++) { 3859 colors[i] = allcolors[a->garray[i]]; 3860 } 3861 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3862 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3863 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3864 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3865 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3866 ISColoringValue *colors; 3867 PetscInt *larray; 3868 ISColoring ocoloring; 3869 3870 /* set coloring for diagonal portion */ 3871 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3872 for (i=0; i<a->A->cmap->n; i++) { 3873 larray[i] = i + A->cmap->rstart; 3874 } 3875 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3876 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3877 for (i=0; i<a->A->cmap->n; i++) { 3878 colors[i] = coloring->colors[larray[i]]; 3879 } 3880 ierr = PetscFree(larray);CHKERRQ(ierr); 3881 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3882 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3883 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3884 3885 /* set coloring for off-diagonal portion */ 3886 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3887 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3888 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3889 for (i=0; i<a->B->cmap->n; i++) { 3890 colors[i] = coloring->colors[larray[i]]; 3891 } 3892 ierr = PetscFree(larray);CHKERRQ(ierr); 3893 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3894 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3895 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3896 } else { 3897 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3898 } 3899 3900 PetscFunctionReturn(0); 3901 } 3902 3903 #if defined(PETSC_HAVE_ADIC) 3904 #undef __FUNCT__ 3905 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3906 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3907 { 3908 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3909 PetscErrorCode ierr; 3910 3911 PetscFunctionBegin; 3912 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3913 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3914 PetscFunctionReturn(0); 3915 } 3916 #endif 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3920 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3921 { 3922 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3923 PetscErrorCode ierr; 3924 3925 PetscFunctionBegin; 3926 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3927 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3928 PetscFunctionReturn(0); 3929 } 3930 3931 #undef __FUNCT__ 3932 #define __FUNCT__ "MatMerge" 3933 /*@ 3934 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3935 matrices from each processor 3936 3937 Collective on MPI_Comm 3938 3939 Input Parameters: 3940 + comm - the communicators the parallel matrix will live on 3941 . inmat - the input sequential matrices 3942 . n - number of local columns (or PETSC_DECIDE) 3943 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3944 3945 Output Parameter: 3946 . outmat - the parallel matrix generated 3947 3948 Level: advanced 3949 3950 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3951 3952 @*/ 3953 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3954 { 3955 PetscErrorCode ierr; 3956 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3957 PetscInt *indx; 3958 PetscScalar *values; 3959 3960 PetscFunctionBegin; 3961 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3962 if (scall == MAT_INITIAL_MATRIX){ 3963 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3964 if (n == PETSC_DECIDE){ 3965 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3966 } 3967 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3968 rstart -= m; 3969 3970 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3971 for (i=0;i<m;i++) { 3972 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3973 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3974 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3975 } 3976 /* This routine will ONLY return MPIAIJ type matrix */ 3977 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3978 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3979 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3980 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3981 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3982 3983 } else if (scall == MAT_REUSE_MATRIX){ 3984 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3985 } else { 3986 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3987 } 3988 3989 for (i=0;i<m;i++) { 3990 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3991 Ii = i + rstart; 3992 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3993 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3994 } 3995 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3996 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3997 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3998 3999 PetscFunctionReturn(0); 4000 } 4001 4002 #undef __FUNCT__ 4003 #define __FUNCT__ "MatFileSplit" 4004 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4005 { 4006 PetscErrorCode ierr; 4007 PetscMPIInt rank; 4008 PetscInt m,N,i,rstart,nnz; 4009 size_t len; 4010 const PetscInt *indx; 4011 PetscViewer out; 4012 char *name; 4013 Mat B; 4014 const PetscScalar *values; 4015 4016 PetscFunctionBegin; 4017 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4018 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4019 /* Should this be the type of the diagonal block of A? */ 4020 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4021 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4022 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4023 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4024 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4025 for (i=0;i<m;i++) { 4026 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4027 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4028 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4029 } 4030 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4031 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4032 4033 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4034 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4035 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4036 sprintf(name,"%s.%d",outfile,rank); 4037 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4038 ierr = PetscFree(name); 4039 ierr = MatView(B,out);CHKERRQ(ierr); 4040 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4041 ierr = MatDestroy(B);CHKERRQ(ierr); 4042 PetscFunctionReturn(0); 4043 } 4044 4045 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4046 #undef __FUNCT__ 4047 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4048 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4049 { 4050 PetscErrorCode ierr; 4051 Mat_Merge_SeqsToMPI *merge; 4052 PetscContainer container; 4053 4054 PetscFunctionBegin; 4055 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4056 if (container) { 4057 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4058 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4059 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4060 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4061 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4062 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4063 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4064 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4065 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4066 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4067 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4068 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4069 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4070 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4071 4072 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4073 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4074 } 4075 ierr = PetscFree(merge);CHKERRQ(ierr); 4076 4077 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4078 PetscFunctionReturn(0); 4079 } 4080 4081 #include "../src/mat/utils/freespace.h" 4082 #include "petscbt.h" 4083 4084 #undef __FUNCT__ 4085 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4086 /*@C 4087 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4088 matrices from each processor 4089 4090 Collective on MPI_Comm 4091 4092 Input Parameters: 4093 + comm - the communicators the parallel matrix will live on 4094 . seqmat - the input sequential matrices 4095 . m - number of local rows (or PETSC_DECIDE) 4096 . n - number of local columns (or PETSC_DECIDE) 4097 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4098 4099 Output Parameter: 4100 . mpimat - the parallel matrix generated 4101 4102 Level: advanced 4103 4104 Notes: 4105 The dimensions of the sequential matrix in each processor MUST be the same. 4106 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4107 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4108 @*/ 4109 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4110 { 4111 PetscErrorCode ierr; 4112 MPI_Comm comm=((PetscObject)mpimat)->comm; 4113 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4114 PetscMPIInt size,rank,taga,*len_s; 4115 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4116 PetscInt proc,m; 4117 PetscInt **buf_ri,**buf_rj; 4118 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4119 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4120 MPI_Request *s_waits,*r_waits; 4121 MPI_Status *status; 4122 MatScalar *aa=a->a; 4123 MatScalar **abuf_r,*ba_i; 4124 Mat_Merge_SeqsToMPI *merge; 4125 PetscContainer container; 4126 4127 PetscFunctionBegin; 4128 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4129 4130 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4131 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4132 4133 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4134 if (container) { 4135 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4136 } 4137 bi = merge->bi; 4138 bj = merge->bj; 4139 buf_ri = merge->buf_ri; 4140 buf_rj = merge->buf_rj; 4141 4142 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4143 owners = merge->rowmap->range; 4144 len_s = merge->len_s; 4145 4146 /* send and recv matrix values */ 4147 /*-----------------------------*/ 4148 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4149 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4150 4151 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4152 for (proc=0,k=0; proc<size; proc++){ 4153 if (!len_s[proc]) continue; 4154 i = owners[proc]; 4155 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4156 k++; 4157 } 4158 4159 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4160 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4161 ierr = PetscFree(status);CHKERRQ(ierr); 4162 4163 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4164 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4165 4166 /* insert mat values of mpimat */ 4167 /*----------------------------*/ 4168 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4169 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4170 4171 for (k=0; k<merge->nrecv; k++){ 4172 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4173 nrows = *(buf_ri_k[k]); 4174 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4175 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4176 } 4177 4178 /* set values of ba */ 4179 m = merge->rowmap->n; 4180 for (i=0; i<m; i++) { 4181 arow = owners[rank] + i; 4182 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4183 bnzi = bi[i+1] - bi[i]; 4184 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4185 4186 /* add local non-zero vals of this proc's seqmat into ba */ 4187 anzi = ai[arow+1] - ai[arow]; 4188 aj = a->j + ai[arow]; 4189 aa = a->a + ai[arow]; 4190 nextaj = 0; 4191 for (j=0; nextaj<anzi; j++){ 4192 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4193 ba_i[j] += aa[nextaj++]; 4194 } 4195 } 4196 4197 /* add received vals into ba */ 4198 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4199 /* i-th row */ 4200 if (i == *nextrow[k]) { 4201 anzi = *(nextai[k]+1) - *nextai[k]; 4202 aj = buf_rj[k] + *(nextai[k]); 4203 aa = abuf_r[k] + *(nextai[k]); 4204 nextaj = 0; 4205 for (j=0; nextaj<anzi; j++){ 4206 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4207 ba_i[j] += aa[nextaj++]; 4208 } 4209 } 4210 nextrow[k]++; nextai[k]++; 4211 } 4212 } 4213 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4214 } 4215 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4216 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4217 4218 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4219 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4220 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4221 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4222 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4223 PetscFunctionReturn(0); 4224 } 4225 4226 #undef __FUNCT__ 4227 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4228 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4229 { 4230 PetscErrorCode ierr; 4231 Mat B_mpi; 4232 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4233 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4234 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4235 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4236 PetscInt len,proc,*dnz,*onz; 4237 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4238 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4239 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4240 MPI_Status *status; 4241 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4242 PetscBT lnkbt; 4243 Mat_Merge_SeqsToMPI *merge; 4244 PetscContainer container; 4245 4246 PetscFunctionBegin; 4247 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4248 4249 /* make sure it is a PETSc comm */ 4250 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4251 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4253 4254 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4255 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4256 4257 /* determine row ownership */ 4258 /*---------------------------------------------------------*/ 4259 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4260 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4261 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4262 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4263 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4264 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4265 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4266 4267 m = merge->rowmap->n; 4268 M = merge->rowmap->N; 4269 owners = merge->rowmap->range; 4270 4271 /* determine the number of messages to send, their lengths */ 4272 /*---------------------------------------------------------*/ 4273 len_s = merge->len_s; 4274 4275 len = 0; /* length of buf_si[] */ 4276 merge->nsend = 0; 4277 for (proc=0; proc<size; proc++){ 4278 len_si[proc] = 0; 4279 if (proc == rank){ 4280 len_s[proc] = 0; 4281 } else { 4282 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4283 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4284 } 4285 if (len_s[proc]) { 4286 merge->nsend++; 4287 nrows = 0; 4288 for (i=owners[proc]; i<owners[proc+1]; i++){ 4289 if (ai[i+1] > ai[i]) nrows++; 4290 } 4291 len_si[proc] = 2*(nrows+1); 4292 len += len_si[proc]; 4293 } 4294 } 4295 4296 /* determine the number and length of messages to receive for ij-structure */ 4297 /*-------------------------------------------------------------------------*/ 4298 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4299 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4300 4301 /* post the Irecv of j-structure */ 4302 /*-------------------------------*/ 4303 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4304 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4305 4306 /* post the Isend of j-structure */ 4307 /*--------------------------------*/ 4308 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4309 4310 for (proc=0, k=0; proc<size; proc++){ 4311 if (!len_s[proc]) continue; 4312 i = owners[proc]; 4313 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4314 k++; 4315 } 4316 4317 /* receives and sends of j-structure are complete */ 4318 /*------------------------------------------------*/ 4319 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4320 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4321 4322 /* send and recv i-structure */ 4323 /*---------------------------*/ 4324 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4325 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4326 4327 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4328 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4329 for (proc=0,k=0; proc<size; proc++){ 4330 if (!len_s[proc]) continue; 4331 /* form outgoing message for i-structure: 4332 buf_si[0]: nrows to be sent 4333 [1:nrows]: row index (global) 4334 [nrows+1:2*nrows+1]: i-structure index 4335 */ 4336 /*-------------------------------------------*/ 4337 nrows = len_si[proc]/2 - 1; 4338 buf_si_i = buf_si + nrows+1; 4339 buf_si[0] = nrows; 4340 buf_si_i[0] = 0; 4341 nrows = 0; 4342 for (i=owners[proc]; i<owners[proc+1]; i++){ 4343 anzi = ai[i+1] - ai[i]; 4344 if (anzi) { 4345 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4346 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4347 nrows++; 4348 } 4349 } 4350 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4351 k++; 4352 buf_si += len_si[proc]; 4353 } 4354 4355 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4356 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4357 4358 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4359 for (i=0; i<merge->nrecv; i++){ 4360 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4361 } 4362 4363 ierr = PetscFree(len_si);CHKERRQ(ierr); 4364 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4365 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4366 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4367 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4368 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4369 ierr = PetscFree(status);CHKERRQ(ierr); 4370 4371 /* compute a local seq matrix in each processor */ 4372 /*----------------------------------------------*/ 4373 /* allocate bi array and free space for accumulating nonzero column info */ 4374 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4375 bi[0] = 0; 4376 4377 /* create and initialize a linked list */ 4378 nlnk = N+1; 4379 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4380 4381 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4382 len = 0; 4383 len = ai[owners[rank+1]] - ai[owners[rank]]; 4384 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4385 current_space = free_space; 4386 4387 /* determine symbolic info for each local row */ 4388 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4389 4390 for (k=0; k<merge->nrecv; k++){ 4391 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4392 nrows = *buf_ri_k[k]; 4393 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4394 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4395 } 4396 4397 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4398 len = 0; 4399 for (i=0;i<m;i++) { 4400 bnzi = 0; 4401 /* add local non-zero cols of this proc's seqmat into lnk */ 4402 arow = owners[rank] + i; 4403 anzi = ai[arow+1] - ai[arow]; 4404 aj = a->j + ai[arow]; 4405 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4406 bnzi += nlnk; 4407 /* add received col data into lnk */ 4408 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4409 if (i == *nextrow[k]) { /* i-th row */ 4410 anzi = *(nextai[k]+1) - *nextai[k]; 4411 aj = buf_rj[k] + *nextai[k]; 4412 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4413 bnzi += nlnk; 4414 nextrow[k]++; nextai[k]++; 4415 } 4416 } 4417 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4418 4419 /* if free space is not available, make more free space */ 4420 if (current_space->local_remaining<bnzi) { 4421 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4422 nspacedouble++; 4423 } 4424 /* copy data into free space, then initialize lnk */ 4425 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4426 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4427 4428 current_space->array += bnzi; 4429 current_space->local_used += bnzi; 4430 current_space->local_remaining -= bnzi; 4431 4432 bi[i+1] = bi[i] + bnzi; 4433 } 4434 4435 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4436 4437 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4438 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4439 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4440 4441 /* create symbolic parallel matrix B_mpi */ 4442 /*---------------------------------------*/ 4443 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4444 if (n==PETSC_DECIDE) { 4445 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4446 } else { 4447 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4448 } 4449 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4450 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4451 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4452 4453 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4454 B_mpi->assembled = PETSC_FALSE; 4455 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4456 merge->bi = bi; 4457 merge->bj = bj; 4458 merge->buf_ri = buf_ri; 4459 merge->buf_rj = buf_rj; 4460 merge->coi = PETSC_NULL; 4461 merge->coj = PETSC_NULL; 4462 merge->owners_co = PETSC_NULL; 4463 4464 /* attach the supporting struct to B_mpi for reuse */ 4465 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4466 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4467 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4468 *mpimat = B_mpi; 4469 4470 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4471 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 #undef __FUNCT__ 4476 #define __FUNCT__ "MatMerge_SeqsToMPI" 4477 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4478 { 4479 PetscErrorCode ierr; 4480 4481 PetscFunctionBegin; 4482 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4483 if (scall == MAT_INITIAL_MATRIX){ 4484 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4485 } 4486 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4487 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 #undef __FUNCT__ 4492 #define __FUNCT__ "MatGetLocalMat" 4493 /*@ 4494 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4495 4496 Not Collective 4497 4498 Input Parameters: 4499 + A - the matrix 4500 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4501 4502 Output Parameter: 4503 . A_loc - the local sequential matrix generated 4504 4505 Level: developer 4506 4507 @*/ 4508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4509 { 4510 PetscErrorCode ierr; 4511 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4512 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4513 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4514 MatScalar *aa=a->a,*ba=b->a,*cam; 4515 PetscScalar *ca; 4516 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4517 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4518 4519 PetscFunctionBegin; 4520 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4521 if (scall == MAT_INITIAL_MATRIX){ 4522 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4523 ci[0] = 0; 4524 for (i=0; i<am; i++){ 4525 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4526 } 4527 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4528 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4529 k = 0; 4530 for (i=0; i<am; i++) { 4531 ncols_o = bi[i+1] - bi[i]; 4532 ncols_d = ai[i+1] - ai[i]; 4533 /* off-diagonal portion of A */ 4534 for (jo=0; jo<ncols_o; jo++) { 4535 col = cmap[*bj]; 4536 if (col >= cstart) break; 4537 cj[k] = col; bj++; 4538 ca[k++] = *ba++; 4539 } 4540 /* diagonal portion of A */ 4541 for (j=0; j<ncols_d; j++) { 4542 cj[k] = cstart + *aj++; 4543 ca[k++] = *aa++; 4544 } 4545 /* off-diagonal portion of A */ 4546 for (j=jo; j<ncols_o; j++) { 4547 cj[k] = cmap[*bj++]; 4548 ca[k++] = *ba++; 4549 } 4550 } 4551 /* put together the new matrix */ 4552 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4553 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4554 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4555 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4556 mat->free_a = PETSC_TRUE; 4557 mat->free_ij = PETSC_TRUE; 4558 mat->nonew = 0; 4559 } else if (scall == MAT_REUSE_MATRIX){ 4560 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4561 ci = mat->i; cj = mat->j; cam = mat->a; 4562 for (i=0; i<am; i++) { 4563 /* off-diagonal portion of A */ 4564 ncols_o = bi[i+1] - bi[i]; 4565 for (jo=0; jo<ncols_o; jo++) { 4566 col = cmap[*bj]; 4567 if (col >= cstart) break; 4568 *cam++ = *ba++; bj++; 4569 } 4570 /* diagonal portion of A */ 4571 ncols_d = ai[i+1] - ai[i]; 4572 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4573 /* off-diagonal portion of A */ 4574 for (j=jo; j<ncols_o; j++) { 4575 *cam++ = *ba++; bj++; 4576 } 4577 } 4578 } else { 4579 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4580 } 4581 4582 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4583 PetscFunctionReturn(0); 4584 } 4585 4586 #undef __FUNCT__ 4587 #define __FUNCT__ "MatGetLocalMatCondensed" 4588 /*@C 4589 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4590 4591 Not Collective 4592 4593 Input Parameters: 4594 + A - the matrix 4595 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4596 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4597 4598 Output Parameter: 4599 . A_loc - the local sequential matrix generated 4600 4601 Level: developer 4602 4603 @*/ 4604 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4605 { 4606 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4607 PetscErrorCode ierr; 4608 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4609 IS isrowa,iscola; 4610 Mat *aloc; 4611 4612 PetscFunctionBegin; 4613 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4614 if (!row){ 4615 start = A->rmap->rstart; end = A->rmap->rend; 4616 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4617 } else { 4618 isrowa = *row; 4619 } 4620 if (!col){ 4621 start = A->cmap->rstart; 4622 cmap = a->garray; 4623 nzA = a->A->cmap->n; 4624 nzB = a->B->cmap->n; 4625 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4626 ncols = 0; 4627 for (i=0; i<nzB; i++) { 4628 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4629 else break; 4630 } 4631 imark = i; 4632 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4633 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4634 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4635 ierr = PetscFree(idx);CHKERRQ(ierr); 4636 } else { 4637 iscola = *col; 4638 } 4639 if (scall != MAT_INITIAL_MATRIX){ 4640 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4641 aloc[0] = *A_loc; 4642 } 4643 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4644 *A_loc = aloc[0]; 4645 ierr = PetscFree(aloc);CHKERRQ(ierr); 4646 if (!row){ 4647 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4648 } 4649 if (!col){ 4650 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4651 } 4652 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4653 PetscFunctionReturn(0); 4654 } 4655 4656 #undef __FUNCT__ 4657 #define __FUNCT__ "MatGetBrowsOfAcols" 4658 /*@C 4659 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4660 4661 Collective on Mat 4662 4663 Input Parameters: 4664 + A,B - the matrices in mpiaij format 4665 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4666 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4667 4668 Output Parameter: 4669 + rowb, colb - index sets of rows and columns of B to extract 4670 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4671 - B_seq - the sequential matrix generated 4672 4673 Level: developer 4674 4675 @*/ 4676 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4677 { 4678 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4679 PetscErrorCode ierr; 4680 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4681 IS isrowb,iscolb; 4682 Mat *bseq; 4683 4684 PetscFunctionBegin; 4685 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4686 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4687 } 4688 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4689 4690 if (scall == MAT_INITIAL_MATRIX){ 4691 start = A->cmap->rstart; 4692 cmap = a->garray; 4693 nzA = a->A->cmap->n; 4694 nzB = a->B->cmap->n; 4695 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4696 ncols = 0; 4697 for (i=0; i<nzB; i++) { /* row < local row index */ 4698 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4699 else break; 4700 } 4701 imark = i; 4702 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4703 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4704 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4705 ierr = PetscFree(idx);CHKERRQ(ierr); 4706 *brstart = imark; 4707 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4708 } else { 4709 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4710 isrowb = *rowb; iscolb = *colb; 4711 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4712 bseq[0] = *B_seq; 4713 } 4714 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4715 *B_seq = bseq[0]; 4716 ierr = PetscFree(bseq);CHKERRQ(ierr); 4717 if (!rowb){ 4718 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4719 } else { 4720 *rowb = isrowb; 4721 } 4722 if (!colb){ 4723 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4724 } else { 4725 *colb = iscolb; 4726 } 4727 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4728 PetscFunctionReturn(0); 4729 } 4730 4731 #undef __FUNCT__ 4732 #define __FUNCT__ "MatGetBrowsOfAoCols" 4733 /*@C 4734 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4735 of the OFF-DIAGONAL portion of local A 4736 4737 Collective on Mat 4738 4739 Input Parameters: 4740 + A,B - the matrices in mpiaij format 4741 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4742 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4743 . startsj_r - similar to startsj for receives 4744 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4745 4746 Output Parameter: 4747 + B_oth - the sequential matrix generated 4748 4749 Level: developer 4750 4751 @*/ 4752 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4753 { 4754 VecScatter_MPI_General *gen_to,*gen_from; 4755 PetscErrorCode ierr; 4756 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4757 Mat_SeqAIJ *b_oth; 4758 VecScatter ctx=a->Mvctx; 4759 MPI_Comm comm=((PetscObject)ctx)->comm; 4760 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4761 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4762 PetscScalar *rvalues,*svalues; 4763 MatScalar *b_otha,*bufa,*bufA; 4764 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4765 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4766 MPI_Status *sstatus,rstatus; 4767 PetscMPIInt jj; 4768 PetscInt *cols,sbs,rbs; 4769 PetscScalar *vals; 4770 4771 PetscFunctionBegin; 4772 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4773 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4774 } 4775 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4776 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4777 4778 gen_to = (VecScatter_MPI_General*)ctx->todata; 4779 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4780 rvalues = gen_from->values; /* holds the length of receiving row */ 4781 svalues = gen_to->values; /* holds the length of sending row */ 4782 nrecvs = gen_from->n; 4783 nsends = gen_to->n; 4784 4785 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4786 srow = gen_to->indices; /* local row index to be sent */ 4787 sstarts = gen_to->starts; 4788 sprocs = gen_to->procs; 4789 sstatus = gen_to->sstatus; 4790 sbs = gen_to->bs; 4791 rstarts = gen_from->starts; 4792 rprocs = gen_from->procs; 4793 rbs = gen_from->bs; 4794 4795 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4796 if (scall == MAT_INITIAL_MATRIX){ 4797 /* i-array */ 4798 /*---------*/ 4799 /* post receives */ 4800 for (i=0; i<nrecvs; i++){ 4801 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4802 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4803 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4804 } 4805 4806 /* pack the outgoing message */ 4807 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4808 sstartsj[0] = 0; rstartsj[0] = 0; 4809 len = 0; /* total length of j or a array to be sent */ 4810 k = 0; 4811 for (i=0; i<nsends; i++){ 4812 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4813 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4814 for (j=0; j<nrows; j++) { 4815 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4816 for (l=0; l<sbs; l++){ 4817 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4818 rowlen[j*sbs+l] = ncols; 4819 len += ncols; 4820 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4821 } 4822 k++; 4823 } 4824 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4825 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4826 } 4827 /* recvs and sends of i-array are completed */ 4828 i = nrecvs; 4829 while (i--) { 4830 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4831 } 4832 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4833 4834 /* allocate buffers for sending j and a arrays */ 4835 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4836 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4837 4838 /* create i-array of B_oth */ 4839 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4840 b_othi[0] = 0; 4841 len = 0; /* total length of j or a array to be received */ 4842 k = 0; 4843 for (i=0; i<nrecvs; i++){ 4844 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4845 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4846 for (j=0; j<nrows; j++) { 4847 b_othi[k+1] = b_othi[k] + rowlen[j]; 4848 len += rowlen[j]; k++; 4849 } 4850 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4851 } 4852 4853 /* allocate space for j and a arrrays of B_oth */ 4854 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4855 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4856 4857 /* j-array */ 4858 /*---------*/ 4859 /* post receives of j-array */ 4860 for (i=0; i<nrecvs; i++){ 4861 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4862 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4863 } 4864 4865 /* pack the outgoing message j-array */ 4866 k = 0; 4867 for (i=0; i<nsends; i++){ 4868 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4869 bufJ = bufj+sstartsj[i]; 4870 for (j=0; j<nrows; j++) { 4871 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4872 for (ll=0; ll<sbs; ll++){ 4873 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4874 for (l=0; l<ncols; l++){ 4875 *bufJ++ = cols[l]; 4876 } 4877 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4878 } 4879 } 4880 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4881 } 4882 4883 /* recvs and sends of j-array are completed */ 4884 i = nrecvs; 4885 while (i--) { 4886 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4887 } 4888 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4889 } else if (scall == MAT_REUSE_MATRIX){ 4890 sstartsj = *startsj; 4891 rstartsj = *startsj_r; 4892 bufa = *bufa_ptr; 4893 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4894 b_otha = b_oth->a; 4895 } else { 4896 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4897 } 4898 4899 /* a-array */ 4900 /*---------*/ 4901 /* post receives of a-array */ 4902 for (i=0; i<nrecvs; i++){ 4903 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4904 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4905 } 4906 4907 /* pack the outgoing message a-array */ 4908 k = 0; 4909 for (i=0; i<nsends; i++){ 4910 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4911 bufA = bufa+sstartsj[i]; 4912 for (j=0; j<nrows; j++) { 4913 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4914 for (ll=0; ll<sbs; ll++){ 4915 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4916 for (l=0; l<ncols; l++){ 4917 *bufA++ = vals[l]; 4918 } 4919 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4920 } 4921 } 4922 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4923 } 4924 /* recvs and sends of a-array are completed */ 4925 i = nrecvs; 4926 while (i--) { 4927 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4928 } 4929 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4930 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4931 4932 if (scall == MAT_INITIAL_MATRIX){ 4933 /* put together the new matrix */ 4934 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4935 4936 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4937 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4938 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4939 b_oth->free_a = PETSC_TRUE; 4940 b_oth->free_ij = PETSC_TRUE; 4941 b_oth->nonew = 0; 4942 4943 ierr = PetscFree(bufj);CHKERRQ(ierr); 4944 if (!startsj || !bufa_ptr){ 4945 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4946 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4947 } else { 4948 *startsj = sstartsj; 4949 *startsj_r = rstartsj; 4950 *bufa_ptr = bufa; 4951 } 4952 } 4953 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4954 PetscFunctionReturn(0); 4955 } 4956 4957 #undef __FUNCT__ 4958 #define __FUNCT__ "MatGetCommunicationStructs" 4959 /*@C 4960 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4961 4962 Not Collective 4963 4964 Input Parameters: 4965 . A - The matrix in mpiaij format 4966 4967 Output Parameter: 4968 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4969 . colmap - A map from global column index to local index into lvec 4970 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4971 4972 Level: developer 4973 4974 @*/ 4975 #if defined (PETSC_USE_CTABLE) 4976 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4977 #else 4978 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4979 #endif 4980 { 4981 Mat_MPIAIJ *a; 4982 4983 PetscFunctionBegin; 4984 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4985 PetscValidPointer(lvec, 2); 4986 PetscValidPointer(colmap, 3); 4987 PetscValidPointer(multScatter, 4); 4988 a = (Mat_MPIAIJ *) A->data; 4989 if (lvec) *lvec = a->lvec; 4990 if (colmap) *colmap = a->colmap; 4991 if (multScatter) *multScatter = a->Mvctx; 4992 PetscFunctionReturn(0); 4993 } 4994 4995 EXTERN_C_BEGIN 4996 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 4997 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 4998 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 4999 EXTERN_C_END 5000 5001 #undef __FUNCT__ 5002 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5003 /* 5004 Computes (B'*A')' since computing B*A directly is untenable 5005 5006 n p p 5007 ( ) ( ) ( ) 5008 m ( A ) * n ( B ) = m ( C ) 5009 ( ) ( ) ( ) 5010 5011 */ 5012 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5013 { 5014 PetscErrorCode ierr; 5015 Mat At,Bt,Ct; 5016 5017 PetscFunctionBegin; 5018 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5019 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5020 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5021 ierr = MatDestroy(At);CHKERRQ(ierr); 5022 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5023 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5024 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 5028 #undef __FUNCT__ 5029 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5030 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5031 { 5032 PetscErrorCode ierr; 5033 PetscInt m=A->rmap->n,n=B->cmap->n; 5034 Mat Cmat; 5035 5036 PetscFunctionBegin; 5037 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5038 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5039 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5040 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5041 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5042 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5043 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5044 *C = Cmat; 5045 PetscFunctionReturn(0); 5046 } 5047 5048 /* ----------------------------------------------------------------*/ 5049 #undef __FUNCT__ 5050 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5051 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5052 { 5053 PetscErrorCode ierr; 5054 5055 PetscFunctionBegin; 5056 if (scall == MAT_INITIAL_MATRIX){ 5057 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5058 } 5059 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5060 PetscFunctionReturn(0); 5061 } 5062 5063 EXTERN_C_BEGIN 5064 #if defined(PETSC_HAVE_MUMPS) 5065 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5066 #endif 5067 #if defined(PETSC_HAVE_PASTIX) 5068 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5069 #endif 5070 #if defined(PETSC_HAVE_SUPERLU_DIST) 5071 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5072 #endif 5073 #if defined(PETSC_HAVE_SPOOLES) 5074 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5075 #endif 5076 EXTERN_C_END 5077 5078 /*MC 5079 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5080 5081 Options Database Keys: 5082 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5083 5084 Level: beginner 5085 5086 .seealso: MatCreateMPIAIJ() 5087 M*/ 5088 5089 EXTERN_C_BEGIN 5090 #undef __FUNCT__ 5091 #define __FUNCT__ "MatCreate_MPIAIJ" 5092 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5093 { 5094 Mat_MPIAIJ *b; 5095 PetscErrorCode ierr; 5096 PetscMPIInt size; 5097 5098 PetscFunctionBegin; 5099 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5100 5101 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5102 B->data = (void*)b; 5103 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5104 B->rmap->bs = 1; 5105 B->assembled = PETSC_FALSE; 5106 B->mapping = 0; 5107 5108 B->insertmode = NOT_SET_VALUES; 5109 b->size = size; 5110 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5111 5112 /* build cache for off array entries formed */ 5113 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5114 b->donotstash = PETSC_FALSE; 5115 b->colmap = 0; 5116 b->garray = 0; 5117 b->roworiented = PETSC_TRUE; 5118 5119 /* stuff used for matrix vector multiply */ 5120 b->lvec = PETSC_NULL; 5121 b->Mvctx = PETSC_NULL; 5122 5123 /* stuff for MatGetRow() */ 5124 b->rowindices = 0; 5125 b->rowvalues = 0; 5126 b->getrowactive = PETSC_FALSE; 5127 5128 #if defined(PETSC_HAVE_SPOOLES) 5129 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5130 "MatGetFactor_mpiaij_spooles", 5131 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5132 #endif 5133 #if defined(PETSC_HAVE_MUMPS) 5134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5135 "MatGetFactor_aij_mumps", 5136 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5137 #endif 5138 #if defined(PETSC_HAVE_PASTIX) 5139 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5140 "MatGetFactor_mpiaij_pastix", 5141 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5142 #endif 5143 #if defined(PETSC_HAVE_SUPERLU_DIST) 5144 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5145 "MatGetFactor_mpiaij_superlu_dist", 5146 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5147 #endif 5148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5149 "MatStoreValues_MPIAIJ", 5150 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5151 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5152 "MatRetrieveValues_MPIAIJ", 5153 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5155 "MatGetDiagonalBlock_MPIAIJ", 5156 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5157 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5158 "MatIsTranspose_MPIAIJ", 5159 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5161 "MatMPIAIJSetPreallocation_MPIAIJ", 5162 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5163 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5164 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5165 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5166 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5167 "MatDiagonalScaleLocal_MPIAIJ", 5168 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5169 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5170 "MatConvert_MPIAIJ_MPIAIJPERM", 5171 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5172 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5173 "MatConvert_MPIAIJ_MPIAIJCRL", 5174 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5175 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5176 "MatConvert_MPIAIJ_MPISBAIJ", 5177 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5178 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5179 "MatMatMult_MPIDense_MPIAIJ", 5180 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5181 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5182 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5183 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5184 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5185 "MatMatMultNumeric_MPIDense_MPIAIJ", 5186 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5187 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5188 PetscFunctionReturn(0); 5189 } 5190 EXTERN_C_END 5191 5192 #undef __FUNCT__ 5193 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5194 /*@ 5195 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5196 and "off-diagonal" part of the matrix in CSR format. 5197 5198 Collective on MPI_Comm 5199 5200 Input Parameters: 5201 + comm - MPI communicator 5202 . m - number of local rows (Cannot be PETSC_DECIDE) 5203 . n - This value should be the same as the local size used in creating the 5204 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5205 calculated if N is given) For square matrices n is almost always m. 5206 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5207 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5208 . i - row indices for "diagonal" portion of matrix 5209 . j - column indices 5210 . a - matrix values 5211 . oi - row indices for "off-diagonal" portion of matrix 5212 . oj - column indices 5213 - oa - matrix values 5214 5215 Output Parameter: 5216 . mat - the matrix 5217 5218 Level: advanced 5219 5220 Notes: 5221 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5222 5223 The i and j indices are 0 based 5224 5225 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5226 5227 This sets local rows and cannot be used to set off-processor values. 5228 5229 You cannot later use MatSetValues() to change values in this matrix. 5230 5231 .keywords: matrix, aij, compressed row, sparse, parallel 5232 5233 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5234 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5235 @*/ 5236 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5237 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5238 { 5239 PetscErrorCode ierr; 5240 Mat_MPIAIJ *maij; 5241 5242 PetscFunctionBegin; 5243 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5244 if (i[0]) { 5245 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5246 } 5247 if (oi[0]) { 5248 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5249 } 5250 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5251 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5252 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5253 maij = (Mat_MPIAIJ*) (*mat)->data; 5254 maij->donotstash = PETSC_TRUE; 5255 (*mat)->preallocated = PETSC_TRUE; 5256 5257 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5258 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5259 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5260 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5261 5262 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5263 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5264 5265 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5266 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5267 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5268 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5269 5270 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5271 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5272 PetscFunctionReturn(0); 5273 } 5274 5275 /* 5276 Special version for direct calls from Fortran 5277 */ 5278 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5279 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5280 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5281 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5282 #endif 5283 5284 /* Change these macros so can be used in void function */ 5285 #undef CHKERRQ 5286 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5287 #undef SETERRQ2 5288 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5289 #undef SETERRQ 5290 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5291 5292 EXTERN_C_BEGIN 5293 #undef __FUNCT__ 5294 #define __FUNCT__ "matsetvaluesmpiaij_" 5295 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5296 { 5297 Mat mat = *mmat; 5298 PetscInt m = *mm, n = *mn; 5299 InsertMode addv = *maddv; 5300 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5301 PetscScalar value; 5302 PetscErrorCode ierr; 5303 5304 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5305 if (mat->insertmode == NOT_SET_VALUES) { 5306 mat->insertmode = addv; 5307 } 5308 #if defined(PETSC_USE_DEBUG) 5309 else if (mat->insertmode != addv) { 5310 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5311 } 5312 #endif 5313 { 5314 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5315 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5316 PetscBool roworiented = aij->roworiented; 5317 5318 /* Some Variables required in the macro */ 5319 Mat A = aij->A; 5320 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5321 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5322 MatScalar *aa = a->a; 5323 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5324 Mat B = aij->B; 5325 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5326 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5327 MatScalar *ba = b->a; 5328 5329 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5330 PetscInt nonew = a->nonew; 5331 MatScalar *ap1,*ap2; 5332 5333 PetscFunctionBegin; 5334 for (i=0; i<m; i++) { 5335 if (im[i] < 0) continue; 5336 #if defined(PETSC_USE_DEBUG) 5337 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5338 #endif 5339 if (im[i] >= rstart && im[i] < rend) { 5340 row = im[i] - rstart; 5341 lastcol1 = -1; 5342 rp1 = aj + ai[row]; 5343 ap1 = aa + ai[row]; 5344 rmax1 = aimax[row]; 5345 nrow1 = ailen[row]; 5346 low1 = 0; 5347 high1 = nrow1; 5348 lastcol2 = -1; 5349 rp2 = bj + bi[row]; 5350 ap2 = ba + bi[row]; 5351 rmax2 = bimax[row]; 5352 nrow2 = bilen[row]; 5353 low2 = 0; 5354 high2 = nrow2; 5355 5356 for (j=0; j<n; j++) { 5357 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5358 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5359 if (in[j] >= cstart && in[j] < cend){ 5360 col = in[j] - cstart; 5361 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5362 } else if (in[j] < 0) continue; 5363 #if defined(PETSC_USE_DEBUG) 5364 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5365 #endif 5366 else { 5367 if (mat->was_assembled) { 5368 if (!aij->colmap) { 5369 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5370 } 5371 #if defined (PETSC_USE_CTABLE) 5372 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5373 col--; 5374 #else 5375 col = aij->colmap[in[j]] - 1; 5376 #endif 5377 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5378 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5379 col = in[j]; 5380 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5381 B = aij->B; 5382 b = (Mat_SeqAIJ*)B->data; 5383 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5384 rp2 = bj + bi[row]; 5385 ap2 = ba + bi[row]; 5386 rmax2 = bimax[row]; 5387 nrow2 = bilen[row]; 5388 low2 = 0; 5389 high2 = nrow2; 5390 bm = aij->B->rmap->n; 5391 ba = b->a; 5392 } 5393 } else col = in[j]; 5394 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5395 } 5396 } 5397 } else { 5398 if (!aij->donotstash) { 5399 if (roworiented) { 5400 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5401 } else { 5402 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5403 } 5404 } 5405 } 5406 }} 5407 PetscFunctionReturnVoid(); 5408 } 5409 EXTERN_C_END 5410 5411