1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (!aij->donotstash) { 394 if (roworiented) { 395 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 396 } else { 397 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 398 } 399 } 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatGetValues_MPIAIJ" 407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 412 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 413 414 PetscFunctionBegin; 415 for (i=0; i<m; i++) { 416 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 417 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 418 if (idxm[i] >= rstart && idxm[i] < rend) { 419 row = idxm[i] - rstart; 420 for (j=0; j<n; j++) { 421 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 422 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 423 if (idxn[j] >= cstart && idxn[j] < cend){ 424 col = idxn[j] - cstart; 425 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 426 } else { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 #if defined (PETSC_USE_CTABLE) 431 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 432 col --; 433 #else 434 col = aij->colmap[idxn[j]] - 1; 435 #endif 436 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 437 else { 438 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 439 } 440 } 441 } 442 } else { 443 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscErrorCode ierr; 457 PetscInt nstash,reallocs; 458 InsertMode addv; 459 460 PetscFunctionBegin; 461 if (aij->donotstash) { 462 PetscFunctionReturn(0); 463 } 464 465 /* make sure all processors are either in INSERTMODE or ADDMODE */ 466 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 467 if (addv == (ADD_VALUES|INSERT_VALUES)) { 468 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 } 470 mat->insertmode = addv; /* in case this processor had no cache */ 471 472 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 473 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 474 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 484 PetscErrorCode ierr; 485 PetscMPIInt n; 486 PetscInt i,j,rstart,ncols,flg; 487 PetscInt *row,*col; 488 PetscTruth other_disassembled; 489 PetscScalar *val; 490 InsertMode addv = mat->insertmode; 491 492 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 493 PetscFunctionBegin; 494 if (!aij->donotstash) { 495 while (1) { 496 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 497 if (!flg) break; 498 499 for (i=0; i<n;) { 500 /* Now identify the consecutive vals belonging to the same row */ 501 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 502 if (j < n) ncols = j-i; 503 else ncols = n-i; 504 /* Now assemble all these values with a single function call */ 505 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 506 i = j; 507 } 508 } 509 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 510 } 511 a->compressedrow.use = PETSC_FALSE; 512 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 513 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 514 515 /* determine if any processor has disassembled, if so we must 516 also disassemble ourselfs, in order that we may reassemble. */ 517 /* 518 if nonzero structure of submatrix B cannot change then we know that 519 no processor disassembled thus we can skip this stuff 520 */ 521 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 522 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 523 if (mat->was_assembled && !other_disassembled) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 } 526 } 527 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 528 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 529 } 530 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 531 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 532 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 534 535 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 536 aij->rowvalues = 0; 537 538 /* used by MatAXPY() */ 539 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 540 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 541 542 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 543 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 550 { 551 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 556 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatZeroRows_MPIAIJ" 562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 563 { 564 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 565 PetscErrorCode ierr; 566 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 567 PetscInt i,*owners = A->rmap->range; 568 PetscInt *nprocs,j,idx,nsends,row; 569 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 570 PetscInt *rvalues,count,base,slen,*source; 571 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 572 MPI_Comm comm = ((PetscObject)A)->comm; 573 MPI_Request *send_waits,*recv_waits; 574 MPI_Status recv_status,*send_status; 575 #if defined(PETSC_DEBUG) 576 PetscTruth found = PETSC_FALSE; 577 #endif 578 579 PetscFunctionBegin; 580 /* first count number of contributors to each processor */ 581 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 582 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 584 j = 0; 585 for (i=0; i<N; i++) { 586 if (lastidx > (idx = rows[i])) j = 0; 587 lastidx = idx; 588 for (; j<size; j++) { 589 if (idx >= owners[j] && idx < owners[j+1]) { 590 nprocs[2*j]++; 591 nprocs[2*j+1] = 1; 592 owner[i] = j; 593 #if defined(PETSC_DEBUG) 594 found = PETSC_TRUE; 595 #endif 596 break; 597 } 598 } 599 #if defined(PETSC_DEBUG) 600 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 601 found = PETSC_FALSE; 602 #endif 603 } 604 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 605 606 /* inform other processors of number of messages and max length*/ 607 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 608 609 /* post receives: */ 610 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 611 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 612 for (i=0; i<nrecvs; i++) { 613 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 614 } 615 616 /* do sends: 617 1) starts[i] gives the starting index in svalues for stuff going to 618 the ith processor 619 */ 620 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 621 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 622 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 623 starts[0] = 0; 624 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 625 for (i=0; i<N; i++) { 626 svalues[starts[owner[i]]++] = rows[i]; 627 } 628 629 starts[0] = 0; 630 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 631 count = 0; 632 for (i=0; i<size; i++) { 633 if (nprocs[2*i+1]) { 634 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscFree(starts);CHKERRQ(ierr); 638 639 base = owners[rank]; 640 641 /* wait on receives */ 642 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 643 source = lens + nrecvs; 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree(lens);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatView_MPIAIJ_Binary" 926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 927 { 928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 929 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 930 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 931 PetscErrorCode ierr; 932 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 933 int fd; 934 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 935 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 936 PetscScalar *column_values; 937 938 PetscFunctionBegin; 939 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 940 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 941 nz = A->nz + B->nz; 942 if (!rank) { 943 header[0] = MAT_FILE_COOKIE; 944 header[1] = mat->rmap->N; 945 header[2] = mat->cmap->N; 946 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 947 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 948 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 949 /* get largest number of rows any processor has */ 950 rlen = mat->rmap->n; 951 range = mat->rmap->range; 952 for (i=1; i<size; i++) { 953 rlen = PetscMax(rlen,range[i+1] - range[i]); 954 } 955 } else { 956 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 957 rlen = mat->rmap->n; 958 } 959 960 /* load up the local row counts */ 961 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 962 for (i=0; i<mat->rmap->n; i++) { 963 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 964 } 965 966 /* store the row lengths to the file */ 967 if (!rank) { 968 MPI_Status status; 969 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 970 for (i=1; i<size; i++) { 971 rlen = range[i+1] - range[i]; 972 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 973 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 974 } 975 } else { 976 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 977 } 978 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 979 980 /* load up the local column indices */ 981 nzmax = nz; /* )th processor needs space a largest processor needs */ 982 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 983 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 984 cnt = 0; 985 for (i=0; i<mat->rmap->n; i++) { 986 for (j=B->i[i]; j<B->i[i+1]; j++) { 987 if ( (col = garray[B->j[j]]) > cstart) break; 988 column_indices[cnt++] = col; 989 } 990 for (k=A->i[i]; k<A->i[i+1]; k++) { 991 column_indices[cnt++] = A->j[k] + cstart; 992 } 993 for (; j<B->i[i+1]; j++) { 994 column_indices[cnt++] = garray[B->j[j]]; 995 } 996 } 997 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 998 999 /* store the column indices to the file */ 1000 if (!rank) { 1001 MPI_Status status; 1002 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1003 for (i=1; i<size; i++) { 1004 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1005 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1006 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1007 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1008 } 1009 } else { 1010 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1011 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1014 1015 /* load up the local column values */ 1016 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1017 cnt = 0; 1018 for (i=0; i<mat->rmap->n; i++) { 1019 for (j=B->i[i]; j<B->i[i+1]; j++) { 1020 if ( garray[B->j[j]] > cstart) break; 1021 column_values[cnt++] = B->a[j]; 1022 } 1023 for (k=A->i[i]; k<A->i[i+1]; k++) { 1024 column_values[cnt++] = A->a[k]; 1025 } 1026 for (; j<B->i[i+1]; j++) { 1027 column_values[cnt++] = B->a[j]; 1028 } 1029 } 1030 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1031 1032 /* store the column values to the file */ 1033 if (!rank) { 1034 MPI_Status status; 1035 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1036 for (i=1; i<size; i++) { 1037 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1038 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1039 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1040 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1041 } 1042 } else { 1043 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1044 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 } 1046 ierr = PetscFree(column_values);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1053 { 1054 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1055 PetscErrorCode ierr; 1056 PetscMPIInt rank = aij->rank,size = aij->size; 1057 PetscTruth isdraw,iascii,isbinary; 1058 PetscViewer sviewer; 1059 PetscViewerFormat format; 1060 1061 PetscFunctionBegin; 1062 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1065 if (iascii) { 1066 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1067 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1068 MatInfo info; 1069 PetscTruth inodes; 1070 1071 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1072 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1073 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1074 if (!inodes) { 1075 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1076 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1077 } else { 1078 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1079 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1080 } 1081 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1083 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1087 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1090 PetscInt inodecount,inodelimit,*inodes; 1091 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1092 if (inodes) { 1093 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1094 } else { 1095 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1096 } 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1099 PetscFunctionReturn(0); 1100 } 1101 } else if (isbinary) { 1102 if (size == 1) { 1103 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1104 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1105 } else { 1106 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1107 } 1108 PetscFunctionReturn(0); 1109 } else if (isdraw) { 1110 PetscDraw draw; 1111 PetscTruth isnull; 1112 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1113 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1114 } 1115 1116 if (size == 1) { 1117 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1118 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1119 } else { 1120 /* assemble the entire matrix onto first processor. */ 1121 Mat A; 1122 Mat_SeqAIJ *Aloc; 1123 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1124 MatScalar *a; 1125 1126 if (mat->rmap->N > 1024) { 1127 PetscTruth flg = PETSC_FALSE; 1128 1129 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1130 if (!flg) { 1131 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1132 } 1133 } 1134 1135 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1140 } 1141 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1142 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1143 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1144 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1145 1146 /* copy over the A part */ 1147 Aloc = (Mat_SeqAIJ*)aij->A->data; 1148 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1149 row = mat->rmap->rstart; 1150 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1151 for (i=0; i<m; i++) { 1152 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1153 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1154 } 1155 aj = Aloc->j; 1156 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1157 1158 /* copy over the B part */ 1159 Aloc = (Mat_SeqAIJ*)aij->B->data; 1160 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1161 row = mat->rmap->rstart; 1162 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1163 ct = cols; 1164 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1165 for (i=0; i<m; i++) { 1166 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1167 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1168 } 1169 ierr = PetscFree(ct);CHKERRQ(ierr); 1170 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1171 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 /* 1173 Everyone has to call to draw the matrix since the graphics waits are 1174 synchronized across all processors that share the PetscDraw object 1175 */ 1176 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1177 if (!rank) { 1178 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1179 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1182 ierr = MatDestroy(A);CHKERRQ(ierr); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatView_MPIAIJ" 1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1190 { 1191 PetscErrorCode ierr; 1192 PetscTruth iascii,isdraw,issocket,isbinary; 1193 1194 PetscFunctionBegin; 1195 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199 if (iascii || isdraw || isbinary || issocket) { 1200 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1201 } else { 1202 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatRelax_MPIAIJ" 1209 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1210 { 1211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212 PetscErrorCode ierr; 1213 Vec bb1 = 0; 1214 PetscTruth hasop; 1215 1216 PetscFunctionBegin; 1217 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1218 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1219 } 1220 1221 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1222 if (flag & SOR_ZERO_INITIAL_GUESS) { 1223 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 its--; 1225 } 1226 1227 while (its--) { 1228 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* update rhs: bb1 = bb - B*x */ 1232 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1234 1235 /* local sweep */ 1236 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1237 } 1238 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1239 if (flag & SOR_ZERO_INITIAL_GUESS) { 1240 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1241 its--; 1242 } 1243 while (its--) { 1244 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* update rhs: bb1 = bb - B*x */ 1248 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1250 1251 /* local sweep */ 1252 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1253 } 1254 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1255 if (flag & SOR_ZERO_INITIAL_GUESS) { 1256 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1257 its--; 1258 } 1259 while (its--) { 1260 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 1263 /* update rhs: bb1 = bb - B*x */ 1264 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1265 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1266 1267 /* local sweep */ 1268 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1269 } 1270 } else if (flag & SOR_EISENSTAT) { 1271 Vec xx1; 1272 1273 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1274 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1275 1276 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 if (!mat->diag) { 1279 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1280 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1281 } 1282 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1283 if (hasop) { 1284 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1285 } else { 1286 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1287 } 1288 ierr = VecAYPX(bb1,-1.0,bb);CHKERRQ(ierr); 1289 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1293 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1294 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1295 } else { 1296 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1297 } 1298 1299 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatPermute_MPIAIJ" 1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1306 { 1307 MPI_Comm comm,pcomm; 1308 PetscInt first,local_size,nrows; 1309 const PetscInt *rows; 1310 PetscMPIInt size; 1311 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1316 /* make a collective version of 'rowp' */ 1317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1318 if (pcomm==comm) { 1319 crowp = rowp; 1320 } else { 1321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1325 } 1326 /* collect the global row permutation and invert it */ 1327 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1328 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1329 if (pcomm!=comm) { 1330 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1331 } 1332 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1333 /* get the local target indices */ 1334 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1336 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1337 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1339 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1340 /* the column permutation is so much easier; 1341 make a local version of 'colp' and invert it */ 1342 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1344 if (size==1) { 1345 lcolp = colp; 1346 } else { 1347 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1348 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1350 } 1351 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1352 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1354 if (size>1) { 1355 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1357 } 1358 /* now we just get the submatrix */ 1359 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1360 /* clean up */ 1361 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1362 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1371 Mat A = mat->A,B = mat->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = 1.0; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } 1404 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1405 info->fill_ratio_needed = 0; 1406 info->factor_mallocs = 0; 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetOption_MPIAIJ" 1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 switch (op) { 1420 case MAT_NEW_NONZERO_LOCATIONS: 1421 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1422 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1423 case MAT_KEEP_NONZERO_PATTERN: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 case MAT_USE_INODES: 1426 case MAT_IGNORE_ZERO_ENTRIES: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = PETSC_TRUE; 1440 break; 1441 case MAT_SYMMETRIC: 1442 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1443 break; 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 case MAT_HERMITIAN: 1446 case MAT_SYMMETRY_ETERNAL: 1447 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1448 break; 1449 default: 1450 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatGetRow_MPIAIJ" 1457 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1458 { 1459 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1460 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1461 PetscErrorCode ierr; 1462 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1463 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1464 PetscInt *cmap,*idx_p; 1465 1466 PetscFunctionBegin; 1467 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1468 mat->getrowactive = PETSC_TRUE; 1469 1470 if (!mat->rowvalues && (idx || v)) { 1471 /* 1472 allocate enough space to hold information from the longest row. 1473 */ 1474 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1475 PetscInt max = 1,tmp; 1476 for (i=0; i<matin->rmap->n; i++) { 1477 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1478 if (max < tmp) { max = tmp; } 1479 } 1480 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1481 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1482 } 1483 1484 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1485 lrow = row - rstart; 1486 1487 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1488 if (!v) {pvA = 0; pvB = 0;} 1489 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1490 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1492 nztot = nzA + nzB; 1493 1494 cmap = mat->garray; 1495 if (v || idx) { 1496 if (nztot) { 1497 /* Sort by increasing column numbers, assuming A and B already sorted */ 1498 PetscInt imark = -1; 1499 if (v) { 1500 *v = v_p = mat->rowvalues; 1501 for (i=0; i<nzB; i++) { 1502 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1503 else break; 1504 } 1505 imark = i; 1506 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1507 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1508 } 1509 if (idx) { 1510 *idx = idx_p = mat->rowindices; 1511 if (imark > -1) { 1512 for (i=0; i<imark; i++) { 1513 idx_p[i] = cmap[cworkB[i]]; 1514 } 1515 } else { 1516 for (i=0; i<nzB; i++) { 1517 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1518 else break; 1519 } 1520 imark = i; 1521 } 1522 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1523 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1524 } 1525 } else { 1526 if (idx) *idx = 0; 1527 if (v) *v = 0; 1528 } 1529 } 1530 *nz = nztot; 1531 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1532 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1538 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1539 { 1540 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1541 1542 PetscFunctionBegin; 1543 if (!aij->getrowactive) { 1544 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1545 } 1546 aij->getrowactive = PETSC_FALSE; 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatNorm_MPIAIJ" 1552 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1553 { 1554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1555 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1556 PetscErrorCode ierr; 1557 PetscInt i,j,cstart = mat->cmap->rstart; 1558 PetscReal sum = 0.0; 1559 MatScalar *v; 1560 1561 PetscFunctionBegin; 1562 if (aij->size == 1) { 1563 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1564 } else { 1565 if (type == NORM_FROBENIUS) { 1566 v = amat->a; 1567 for (i=0; i<amat->nz; i++) { 1568 #if defined(PETSC_USE_COMPLEX) 1569 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1570 #else 1571 sum += (*v)*(*v); v++; 1572 #endif 1573 } 1574 v = bmat->a; 1575 for (i=0; i<bmat->nz; i++) { 1576 #if defined(PETSC_USE_COMPLEX) 1577 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1578 #else 1579 sum += (*v)*(*v); v++; 1580 #endif 1581 } 1582 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1583 *norm = sqrt(*norm); 1584 } else if (type == NORM_1) { /* max column norm */ 1585 PetscReal *tmp,*tmp2; 1586 PetscInt *jj,*garray = aij->garray; 1587 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1588 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1589 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1590 *norm = 0.0; 1591 v = amat->a; jj = amat->j; 1592 for (j=0; j<amat->nz; j++) { 1593 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1594 } 1595 v = bmat->a; jj = bmat->j; 1596 for (j=0; j<bmat->nz; j++) { 1597 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1598 } 1599 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1600 for (j=0; j<mat->cmap->N; j++) { 1601 if (tmp2[j] > *norm) *norm = tmp2[j]; 1602 } 1603 ierr = PetscFree(tmp);CHKERRQ(ierr); 1604 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1605 } else if (type == NORM_INFINITY) { /* max row norm */ 1606 PetscReal ntemp = 0.0; 1607 for (j=0; j<aij->A->rmap->n; j++) { 1608 v = amat->a + amat->i[j]; 1609 sum = 0.0; 1610 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1611 sum += PetscAbsScalar(*v); v++; 1612 } 1613 v = bmat->a + bmat->i[j]; 1614 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1615 sum += PetscAbsScalar(*v); v++; 1616 } 1617 if (sum > ntemp) ntemp = sum; 1618 } 1619 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1620 } else { 1621 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1622 } 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatTranspose_MPIAIJ" 1629 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1630 { 1631 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1632 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1633 PetscErrorCode ierr; 1634 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1635 PetscInt cstart=A->cmap->rstart,ncol; 1636 Mat B; 1637 MatScalar *array; 1638 1639 PetscFunctionBegin; 1640 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1641 1642 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1643 ai = Aloc->i; aj = Aloc->j; 1644 bi = Bloc->i; bj = Bloc->j; 1645 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1646 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1647 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1648 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1649 for (i=0; i<ai[ma]; i++){ 1650 d_nnz[aj[i]] ++; 1651 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1652 } 1653 1654 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1655 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1656 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1657 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1658 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1659 } else { 1660 B = *matout; 1661 } 1662 1663 /* copy over the A part */ 1664 array = Aloc->a; 1665 row = A->rmap->rstart; 1666 for (i=0; i<ma; i++) { 1667 ncol = ai[i+1]-ai[i]; 1668 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1669 row++; array += ncol; aj += ncol; 1670 } 1671 aj = Aloc->j; 1672 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1673 1674 /* copy over the B part */ 1675 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1676 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1677 array = Bloc->a; 1678 row = A->rmap->rstart; 1679 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1680 cols_tmp = cols; 1681 for (i=0; i<mb; i++) { 1682 ncol = bi[i+1]-bi[i]; 1683 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1684 row++; array += ncol; cols_tmp += ncol; 1685 } 1686 ierr = PetscFree(cols);CHKERRQ(ierr); 1687 1688 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1691 *matout = B; 1692 } else { 1693 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1694 } 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1700 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1701 { 1702 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1703 Mat a = aij->A,b = aij->B; 1704 PetscErrorCode ierr; 1705 PetscInt s1,s2,s3; 1706 1707 PetscFunctionBegin; 1708 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1709 if (rr) { 1710 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1711 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1712 /* Overlap communication with computation. */ 1713 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1714 } 1715 if (ll) { 1716 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1717 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1718 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1719 } 1720 /* scale the diagonal block */ 1721 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1722 1723 if (rr) { 1724 /* Do a scatter end and then right scale the off-diagonal block */ 1725 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1727 } 1728 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1734 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1735 { 1736 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1741 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1746 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1747 { 1748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1749 PetscErrorCode ierr; 1750 1751 PetscFunctionBegin; 1752 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatEqual_MPIAIJ" 1758 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1759 { 1760 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1761 Mat a,b,c,d; 1762 PetscTruth flg; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 a = matA->A; b = matA->B; 1767 c = matB->A; d = matB->B; 1768 1769 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1770 if (flg) { 1771 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1772 } 1773 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatCopy_MPIAIJ" 1779 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1780 { 1781 PetscErrorCode ierr; 1782 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1783 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1784 1785 PetscFunctionBegin; 1786 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1787 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1788 /* because of the column compression in the off-processor part of the matrix a->B, 1789 the number of columns in a->B and b->B may be different, hence we cannot call 1790 the MatCopy() directly on the two parts. If need be, we can provide a more 1791 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1792 then copying the submatrices */ 1793 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1794 } else { 1795 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1796 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1797 } 1798 PetscFunctionReturn(0); 1799 } 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1803 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1804 { 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #include "petscblaslapack.h" 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatAXPY_MPIAIJ" 1815 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1816 { 1817 PetscErrorCode ierr; 1818 PetscInt i; 1819 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1820 PetscBLASInt bnz,one=1; 1821 Mat_SeqAIJ *x,*y; 1822 1823 PetscFunctionBegin; 1824 if (str == SAME_NONZERO_PATTERN) { 1825 PetscScalar alpha = a; 1826 x = (Mat_SeqAIJ *)xx->A->data; 1827 y = (Mat_SeqAIJ *)yy->A->data; 1828 bnz = PetscBLASIntCast(x->nz); 1829 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1830 x = (Mat_SeqAIJ *)xx->B->data; 1831 y = (Mat_SeqAIJ *)yy->B->data; 1832 bnz = PetscBLASIntCast(x->nz); 1833 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1834 } else if (str == SUBSET_NONZERO_PATTERN) { 1835 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1836 1837 x = (Mat_SeqAIJ *)xx->B->data; 1838 y = (Mat_SeqAIJ *)yy->B->data; 1839 if (y->xtoy && y->XtoY != xx->B) { 1840 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1841 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1842 } 1843 if (!y->xtoy) { /* get xtoy */ 1844 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1845 y->XtoY = xx->B; 1846 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1847 } 1848 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1849 } else { 1850 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1851 } 1852 PetscFunctionReturn(0); 1853 } 1854 1855 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatConjugate_MPIAIJ" 1859 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1860 { 1861 #if defined(PETSC_USE_COMPLEX) 1862 PetscErrorCode ierr; 1863 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1864 1865 PetscFunctionBegin; 1866 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1867 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1868 #else 1869 PetscFunctionBegin; 1870 #endif 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatRealPart_MPIAIJ" 1876 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1877 { 1878 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1883 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1889 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1890 { 1891 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1896 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #ifdef PETSC_HAVE_PBGL 1901 1902 #include <boost/parallel/mpi/bsp_process_group.hpp> 1903 #include <boost/graph/distributed/ilu_default_graph.hpp> 1904 #include <boost/graph/distributed/ilu_0_block.hpp> 1905 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1906 #include <boost/graph/distributed/petsc/interface.hpp> 1907 #include <boost/multi_array.hpp> 1908 #include <boost/parallel/distributed_property_map->hpp> 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1912 /* 1913 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1914 */ 1915 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1916 { 1917 namespace petsc = boost::distributed::petsc; 1918 1919 namespace graph_dist = boost::graph::distributed; 1920 using boost::graph::distributed::ilu_default::process_group_type; 1921 using boost::graph::ilu_permuted; 1922 1923 PetscTruth row_identity, col_identity; 1924 PetscContainer c; 1925 PetscInt m, n, M, N; 1926 PetscErrorCode ierr; 1927 1928 PetscFunctionBegin; 1929 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1930 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1931 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1932 if (!row_identity || !col_identity) { 1933 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1934 } 1935 1936 process_group_type pg; 1937 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1938 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1939 lgraph_type& level_graph = *lgraph_p; 1940 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1941 1942 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1943 ilu_permuted(level_graph); 1944 1945 /* put together the new matrix */ 1946 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1947 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1948 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1949 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1950 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1951 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 1954 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1955 ierr = PetscContainerSetPointer(c, lgraph_p); 1956 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1962 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1963 { 1964 PetscFunctionBegin; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatSolve_MPIAIJ" 1970 /* 1971 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1972 */ 1973 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1974 { 1975 namespace graph_dist = boost::graph::distributed; 1976 1977 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1978 lgraph_type* lgraph_p; 1979 PetscContainer c; 1980 PetscErrorCode ierr; 1981 1982 PetscFunctionBegin; 1983 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1984 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1985 ierr = VecCopy(b, x);CHKERRQ(ierr); 1986 1987 PetscScalar* array_x; 1988 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1989 PetscInt sx; 1990 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 1991 1992 PetscScalar* array_b; 1993 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 1994 PetscInt sb; 1995 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 1996 1997 lgraph_type& level_graph = *lgraph_p; 1998 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1999 2000 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2001 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2002 ref_x(array_x, boost::extents[num_vertices(graph)]); 2003 2004 typedef boost::iterator_property_map<array_ref_type::iterator, 2005 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2006 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2007 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2008 2009 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2010 2011 PetscFunctionReturn(0); 2012 } 2013 #endif 2014 2015 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2016 PetscInt nzlocal,nsends,nrecvs; 2017 PetscMPIInt *send_rank; 2018 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2019 PetscScalar *sbuf_a,**rbuf_a; 2020 PetscErrorCode (*MatDestroy)(Mat); 2021 } Mat_Redundant; 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2025 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2026 { 2027 PetscErrorCode ierr; 2028 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2029 PetscInt i; 2030 2031 PetscFunctionBegin; 2032 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2033 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2034 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2035 for (i=0; i<redund->nrecvs; i++){ 2036 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2037 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2038 } 2039 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2040 ierr = PetscFree(redund);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatDestroy_MatRedundant" 2046 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2047 { 2048 PetscErrorCode ierr; 2049 PetscContainer container; 2050 Mat_Redundant *redund=PETSC_NULL; 2051 2052 PetscFunctionBegin; 2053 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2054 if (container) { 2055 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2056 } else { 2057 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2058 } 2059 A->ops->destroy = redund->MatDestroy; 2060 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2061 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2062 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2068 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2069 { 2070 PetscMPIInt rank,size; 2071 MPI_Comm comm=((PetscObject)mat)->comm; 2072 PetscErrorCode ierr; 2073 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2074 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2075 PetscInt *rowrange=mat->rmap->range; 2076 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2077 Mat A=aij->A,B=aij->B,C=*matredundant; 2078 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2079 PetscScalar *sbuf_a; 2080 PetscInt nzlocal=a->nz+b->nz; 2081 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2082 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2083 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2084 MatScalar *aworkA,*aworkB; 2085 PetscScalar *vals; 2086 PetscMPIInt tag1,tag2,tag3,imdex; 2087 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2088 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2089 MPI_Status recv_status,*send_status; 2090 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2091 PetscInt **rbuf_j=PETSC_NULL; 2092 PetscScalar **rbuf_a=PETSC_NULL; 2093 Mat_Redundant *redund=PETSC_NULL; 2094 PetscContainer container; 2095 2096 PetscFunctionBegin; 2097 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2098 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2099 2100 if (reuse == MAT_REUSE_MATRIX) { 2101 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2102 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2103 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2104 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2105 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2106 if (container) { 2107 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2108 } else { 2109 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2110 } 2111 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2112 2113 nsends = redund->nsends; 2114 nrecvs = redund->nrecvs; 2115 send_rank = redund->send_rank; recv_rank = send_rank + size; 2116 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2117 sbuf_j = redund->sbuf_j; 2118 sbuf_a = redund->sbuf_a; 2119 rbuf_j = redund->rbuf_j; 2120 rbuf_a = redund->rbuf_a; 2121 } 2122 2123 if (reuse == MAT_INITIAL_MATRIX){ 2124 PetscMPIInt subrank,subsize; 2125 PetscInt nleftover,np_subcomm; 2126 /* get the destination processors' id send_rank, nsends and nrecvs */ 2127 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2128 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2129 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2130 recv_rank = send_rank + size; 2131 np_subcomm = size/nsubcomm; 2132 nleftover = size - nsubcomm*np_subcomm; 2133 nsends = 0; nrecvs = 0; 2134 for (i=0; i<size; i++){ /* i=rank*/ 2135 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2136 send_rank[nsends] = i; nsends++; 2137 recv_rank[nrecvs++] = i; 2138 } 2139 } 2140 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2141 i = size-nleftover-1; 2142 j = 0; 2143 while (j < nsubcomm - nleftover){ 2144 send_rank[nsends++] = i; 2145 i--; j++; 2146 } 2147 } 2148 2149 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2150 for (i=0; i<nleftover; i++){ 2151 recv_rank[nrecvs++] = size-nleftover+i; 2152 } 2153 } 2154 2155 /* allocate sbuf_j, sbuf_a */ 2156 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2157 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2158 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2159 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2160 2161 /* copy mat's local entries into the buffers */ 2162 if (reuse == MAT_INITIAL_MATRIX){ 2163 rownz_max = 0; 2164 rptr = sbuf_j; 2165 cols = sbuf_j + rend-rstart + 1; 2166 vals = sbuf_a; 2167 rptr[0] = 0; 2168 for (i=0; i<rend-rstart; i++){ 2169 row = i + rstart; 2170 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2171 ncols = nzA + nzB; 2172 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2173 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2174 /* load the column indices for this row into cols */ 2175 lwrite = 0; 2176 for (l=0; l<nzB; l++) { 2177 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2178 vals[lwrite] = aworkB[l]; 2179 cols[lwrite++] = ctmp; 2180 } 2181 } 2182 for (l=0; l<nzA; l++){ 2183 vals[lwrite] = aworkA[l]; 2184 cols[lwrite++] = cstart + cworkA[l]; 2185 } 2186 for (l=0; l<nzB; l++) { 2187 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2188 vals[lwrite] = aworkB[l]; 2189 cols[lwrite++] = ctmp; 2190 } 2191 } 2192 vals += ncols; 2193 cols += ncols; 2194 rptr[i+1] = rptr[i] + ncols; 2195 if (rownz_max < ncols) rownz_max = ncols; 2196 } 2197 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2198 } else { /* only copy matrix values into sbuf_a */ 2199 rptr = sbuf_j; 2200 vals = sbuf_a; 2201 rptr[0] = 0; 2202 for (i=0; i<rend-rstart; i++){ 2203 row = i + rstart; 2204 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2205 ncols = nzA + nzB; 2206 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2207 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2208 lwrite = 0; 2209 for (l=0; l<nzB; l++) { 2210 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2211 } 2212 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2213 for (l=0; l<nzB; l++) { 2214 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2215 } 2216 vals += ncols; 2217 rptr[i+1] = rptr[i] + ncols; 2218 } 2219 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2220 2221 /* send nzlocal to others, and recv other's nzlocal */ 2222 /*--------------------------------------------------*/ 2223 if (reuse == MAT_INITIAL_MATRIX){ 2224 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2225 s_waits2 = s_waits3 + nsends; 2226 s_waits1 = s_waits2 + nsends; 2227 r_waits1 = s_waits1 + nsends; 2228 r_waits2 = r_waits1 + nrecvs; 2229 r_waits3 = r_waits2 + nrecvs; 2230 } else { 2231 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2232 r_waits3 = s_waits3 + nsends; 2233 } 2234 2235 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2236 if (reuse == MAT_INITIAL_MATRIX){ 2237 /* get new tags to keep the communication clean */ 2238 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2239 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2240 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2241 rbuf_nz = sbuf_nz + nsends; 2242 2243 /* post receives of other's nzlocal */ 2244 for (i=0; i<nrecvs; i++){ 2245 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2246 } 2247 /* send nzlocal to others */ 2248 for (i=0; i<nsends; i++){ 2249 sbuf_nz[i] = nzlocal; 2250 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2251 } 2252 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2253 count = nrecvs; 2254 while (count) { 2255 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2256 recv_rank[imdex] = recv_status.MPI_SOURCE; 2257 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2258 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2259 2260 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2261 rbuf_nz[imdex] += i + 2; 2262 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2263 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2264 count--; 2265 } 2266 /* wait on sends of nzlocal */ 2267 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2268 /* send mat->i,j to others, and recv from other's */ 2269 /*------------------------------------------------*/ 2270 for (i=0; i<nsends; i++){ 2271 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2272 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2273 } 2274 /* wait on receives of mat->i,j */ 2275 /*------------------------------*/ 2276 count = nrecvs; 2277 while (count) { 2278 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2279 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2280 count--; 2281 } 2282 /* wait on sends of mat->i,j */ 2283 /*---------------------------*/ 2284 if (nsends) { 2285 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2286 } 2287 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2288 2289 /* post receives, send and receive mat->a */ 2290 /*----------------------------------------*/ 2291 for (imdex=0; imdex<nrecvs; imdex++) { 2292 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2293 } 2294 for (i=0; i<nsends; i++){ 2295 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2296 } 2297 count = nrecvs; 2298 while (count) { 2299 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2300 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2301 count--; 2302 } 2303 if (nsends) { 2304 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2305 } 2306 2307 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2308 2309 /* create redundant matrix */ 2310 /*-------------------------*/ 2311 if (reuse == MAT_INITIAL_MATRIX){ 2312 /* compute rownz_max for preallocation */ 2313 for (imdex=0; imdex<nrecvs; imdex++){ 2314 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2315 rptr = rbuf_j[imdex]; 2316 for (i=0; i<j; i++){ 2317 ncols = rptr[i+1] - rptr[i]; 2318 if (rownz_max < ncols) rownz_max = ncols; 2319 } 2320 } 2321 2322 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2323 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2324 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2325 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2326 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2327 } else { 2328 C = *matredundant; 2329 } 2330 2331 /* insert local matrix entries */ 2332 rptr = sbuf_j; 2333 cols = sbuf_j + rend-rstart + 1; 2334 vals = sbuf_a; 2335 for (i=0; i<rend-rstart; i++){ 2336 row = i + rstart; 2337 ncols = rptr[i+1] - rptr[i]; 2338 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2339 vals += ncols; 2340 cols += ncols; 2341 } 2342 /* insert received matrix entries */ 2343 for (imdex=0; imdex<nrecvs; imdex++){ 2344 rstart = rowrange[recv_rank[imdex]]; 2345 rend = rowrange[recv_rank[imdex]+1]; 2346 rptr = rbuf_j[imdex]; 2347 cols = rbuf_j[imdex] + rend-rstart + 1; 2348 vals = rbuf_a[imdex]; 2349 for (i=0; i<rend-rstart; i++){ 2350 row = i + rstart; 2351 ncols = rptr[i+1] - rptr[i]; 2352 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2353 vals += ncols; 2354 cols += ncols; 2355 } 2356 } 2357 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2360 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2361 if (reuse == MAT_INITIAL_MATRIX){ 2362 PetscContainer container; 2363 *matredundant = C; 2364 /* create a supporting struct and attach it to C for reuse */ 2365 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2366 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2367 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2368 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2369 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2370 2371 redund->nzlocal = nzlocal; 2372 redund->nsends = nsends; 2373 redund->nrecvs = nrecvs; 2374 redund->send_rank = send_rank; 2375 redund->sbuf_nz = sbuf_nz; 2376 redund->sbuf_j = sbuf_j; 2377 redund->sbuf_a = sbuf_a; 2378 redund->rbuf_j = rbuf_j; 2379 redund->rbuf_a = rbuf_a; 2380 2381 redund->MatDestroy = C->ops->destroy; 2382 C->ops->destroy = MatDestroy_MatRedundant; 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2389 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2390 { 2391 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2392 PetscErrorCode ierr; 2393 PetscInt i,*idxb = 0; 2394 PetscScalar *va,*vb; 2395 Vec vtmp; 2396 2397 PetscFunctionBegin; 2398 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2399 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2400 if (idx) { 2401 for (i=0; i<A->rmap->n; i++) { 2402 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2403 } 2404 } 2405 2406 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2407 if (idx) { 2408 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2409 } 2410 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2411 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2412 2413 for (i=0; i<A->rmap->n; i++){ 2414 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2415 va[i] = vb[i]; 2416 if (idx) idx[i] = a->garray[idxb[i]]; 2417 } 2418 } 2419 2420 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2421 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2422 if (idxb) { 2423 ierr = PetscFree(idxb);CHKERRQ(ierr); 2424 } 2425 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2431 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2432 { 2433 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2434 PetscErrorCode ierr; 2435 PetscInt i,*idxb = 0; 2436 PetscScalar *va,*vb; 2437 Vec vtmp; 2438 2439 PetscFunctionBegin; 2440 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2441 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2442 if (idx) { 2443 for (i=0; i<A->cmap->n; i++) { 2444 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2445 } 2446 } 2447 2448 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2449 if (idx) { 2450 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2451 } 2452 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2453 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2454 2455 for (i=0; i<A->rmap->n; i++){ 2456 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2457 va[i] = vb[i]; 2458 if (idx) idx[i] = a->garray[idxb[i]]; 2459 } 2460 } 2461 2462 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2464 if (idxb) { 2465 ierr = PetscFree(idxb);CHKERRQ(ierr); 2466 } 2467 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2473 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2474 { 2475 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2476 PetscInt n = A->rmap->n; 2477 PetscInt cstart = A->cmap->rstart; 2478 PetscInt *cmap = mat->garray; 2479 PetscInt *diagIdx, *offdiagIdx; 2480 Vec diagV, offdiagV; 2481 PetscScalar *a, *diagA, *offdiagA; 2482 PetscInt r; 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2488 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2489 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2490 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2491 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2492 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2493 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2494 for(r = 0; r < n; ++r) { 2495 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2496 a[r] = diagA[r]; 2497 idx[r] = cstart + diagIdx[r]; 2498 } else { 2499 a[r] = offdiagA[r]; 2500 idx[r] = cmap[offdiagIdx[r]]; 2501 } 2502 } 2503 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2507 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2508 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2514 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2515 { 2516 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2517 PetscInt n = A->rmap->n; 2518 PetscInt cstart = A->cmap->rstart; 2519 PetscInt *cmap = mat->garray; 2520 PetscInt *diagIdx, *offdiagIdx; 2521 Vec diagV, offdiagV; 2522 PetscScalar *a, *diagA, *offdiagA; 2523 PetscInt r; 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2528 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2529 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2530 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2531 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2532 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2533 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2534 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2535 for(r = 0; r < n; ++r) { 2536 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2537 a[r] = diagA[r]; 2538 idx[r] = cstart + diagIdx[r]; 2539 } else { 2540 a[r] = offdiagA[r]; 2541 idx[r] = cmap[offdiagIdx[r]]; 2542 } 2543 } 2544 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2545 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2546 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2547 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2548 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2549 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2555 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2556 { 2557 PetscErrorCode ierr; 2558 Mat *dummy; 2559 2560 PetscFunctionBegin; 2561 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2562 *newmat = *dummy; 2563 ierr = PetscFree(dummy);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2568 /* -------------------------------------------------------------------*/ 2569 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2570 MatGetRow_MPIAIJ, 2571 MatRestoreRow_MPIAIJ, 2572 MatMult_MPIAIJ, 2573 /* 4*/ MatMultAdd_MPIAIJ, 2574 MatMultTranspose_MPIAIJ, 2575 MatMultTransposeAdd_MPIAIJ, 2576 #ifdef PETSC_HAVE_PBGL 2577 MatSolve_MPIAIJ, 2578 #else 2579 0, 2580 #endif 2581 0, 2582 0, 2583 /*10*/ 0, 2584 0, 2585 0, 2586 MatRelax_MPIAIJ, 2587 MatTranspose_MPIAIJ, 2588 /*15*/ MatGetInfo_MPIAIJ, 2589 MatEqual_MPIAIJ, 2590 MatGetDiagonal_MPIAIJ, 2591 MatDiagonalScale_MPIAIJ, 2592 MatNorm_MPIAIJ, 2593 /*20*/ MatAssemblyBegin_MPIAIJ, 2594 MatAssemblyEnd_MPIAIJ, 2595 MatSetOption_MPIAIJ, 2596 MatZeroEntries_MPIAIJ, 2597 /*24*/ MatZeroRows_MPIAIJ, 2598 0, 2599 #ifdef PETSC_HAVE_PBGL 2600 0, 2601 #else 2602 0, 2603 #endif 2604 0, 2605 0, 2606 /*29*/ MatSetUpPreallocation_MPIAIJ, 2607 #ifdef PETSC_HAVE_PBGL 2608 0, 2609 #else 2610 0, 2611 #endif 2612 0, 2613 0, 2614 0, 2615 /*34*/ MatDuplicate_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*39*/ MatAXPY_MPIAIJ, 2621 MatGetSubMatrices_MPIAIJ, 2622 MatIncreaseOverlap_MPIAIJ, 2623 MatGetValues_MPIAIJ, 2624 MatCopy_MPIAIJ, 2625 /*44*/ MatGetRowMax_MPIAIJ, 2626 MatScale_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 /*49*/ MatSetBlockSize_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*54*/ MatFDColoringCreate_MPIAIJ, 2636 0, 2637 MatSetUnfactored_MPIAIJ, 2638 MatPermute_MPIAIJ, 2639 0, 2640 /*59*/ MatGetSubMatrix_MPIAIJ, 2641 MatDestroy_MPIAIJ, 2642 MatView_MPIAIJ, 2643 0, 2644 0, 2645 /*64*/ 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2651 MatGetRowMinAbs_MPIAIJ, 2652 0, 2653 MatSetColoring_MPIAIJ, 2654 #if defined(PETSC_HAVE_ADIC) 2655 MatSetValuesAdic_MPIAIJ, 2656 #else 2657 0, 2658 #endif 2659 MatSetValuesAdifor_MPIAIJ, 2660 /*75*/ MatFDColoringApply_AIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_Basic, 2678 MatPtAPSymbolic_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*99*/ 0, 2685 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2686 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 MatGetRedundantMatrix_MPIAIJ, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 0, 2699 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2700 0, 2701 0, 2702 0, 2703 0, 2704 0 2705 }; 2706 2707 /* ----------------------------------------------------------------------------------------*/ 2708 2709 EXTERN_C_BEGIN 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2712 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2713 { 2714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 EXTERN_C_END 2723 2724 EXTERN_C_BEGIN 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2727 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2728 { 2729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2734 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 EXTERN_C_END 2738 2739 #include "petscpc.h" 2740 EXTERN_C_BEGIN 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2743 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2744 { 2745 Mat_MPIAIJ *b; 2746 PetscErrorCode ierr; 2747 PetscInt i; 2748 2749 PetscFunctionBegin; 2750 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2751 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2752 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2753 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2754 2755 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2756 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2757 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2758 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2759 if (d_nnz) { 2760 for (i=0; i<B->rmap->n; i++) { 2761 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2762 } 2763 } 2764 if (o_nnz) { 2765 for (i=0; i<B->rmap->n; i++) { 2766 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2767 } 2768 } 2769 b = (Mat_MPIAIJ*)B->data; 2770 2771 if (!B->preallocated) { 2772 /* Explicitly create 2 MATSEQAIJ matrices. */ 2773 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2774 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2775 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2776 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2777 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2778 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2779 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2781 } 2782 2783 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2784 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2785 B->preallocated = PETSC_TRUE; 2786 PetscFunctionReturn(0); 2787 } 2788 EXTERN_C_END 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2793 { 2794 Mat mat; 2795 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 *newmat = 0; 2800 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2801 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2802 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2803 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2804 a = (Mat_MPIAIJ*)mat->data; 2805 2806 mat->factor = matin->factor; 2807 mat->rmap->bs = matin->rmap->bs; 2808 mat->assembled = PETSC_TRUE; 2809 mat->insertmode = NOT_SET_VALUES; 2810 mat->preallocated = PETSC_TRUE; 2811 2812 a->size = oldmat->size; 2813 a->rank = oldmat->rank; 2814 a->donotstash = oldmat->donotstash; 2815 a->roworiented = oldmat->roworiented; 2816 a->rowindices = 0; 2817 a->rowvalues = 0; 2818 a->getrowactive = PETSC_FALSE; 2819 2820 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2821 ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2822 2823 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2824 if (oldmat->colmap) { 2825 #if defined (PETSC_USE_CTABLE) 2826 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2827 #else 2828 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2829 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2830 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2831 #endif 2832 } else a->colmap = 0; 2833 if (oldmat->garray) { 2834 PetscInt len; 2835 len = oldmat->B->cmap->n; 2836 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2837 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2838 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2839 } else a->garray = 0; 2840 2841 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2843 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2844 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2845 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2846 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2847 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2848 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2849 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2850 *newmat = mat; 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #include "petscsys.h" 2855 2856 #undef __FUNCT__ 2857 #define __FUNCT__ "MatLoad_MPIAIJ" 2858 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2859 { 2860 Mat A; 2861 PetscScalar *vals,*svals; 2862 MPI_Comm comm = ((PetscObject)viewer)->comm; 2863 MPI_Status status; 2864 PetscErrorCode ierr; 2865 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2866 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2867 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2868 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2869 PetscInt cend,cstart,n,*rowners; 2870 int fd; 2871 2872 PetscFunctionBegin; 2873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2874 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2875 if (!rank) { 2876 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2877 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2878 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2879 } 2880 2881 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2882 M = header[1]; N = header[2]; 2883 /* determine ownership of all rows */ 2884 m = M/size + ((M % size) > rank); 2885 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2886 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2887 2888 /* First process needs enough room for process with most rows */ 2889 if (!rank) { 2890 mmax = rowners[1]; 2891 for (i=2; i<size; i++) { 2892 mmax = PetscMax(mmax,rowners[i]); 2893 } 2894 } else mmax = m; 2895 2896 rowners[0] = 0; 2897 for (i=2; i<=size; i++) { 2898 rowners[i] += rowners[i-1]; 2899 } 2900 rstart = rowners[rank]; 2901 rend = rowners[rank+1]; 2902 2903 /* distribute row lengths to all processors */ 2904 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2905 if (!rank) { 2906 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2907 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2908 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2909 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2910 for (j=0; j<m; j++) { 2911 procsnz[0] += ourlens[j]; 2912 } 2913 for (i=1; i<size; i++) { 2914 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2915 /* calculate the number of nonzeros on each processor */ 2916 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2917 procsnz[i] += rowlengths[j]; 2918 } 2919 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2920 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2921 } 2922 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2923 } else { 2924 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2925 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2926 } 2927 2928 if (!rank) { 2929 /* determine max buffer needed and allocate it */ 2930 maxnz = 0; 2931 for (i=0; i<size; i++) { 2932 maxnz = PetscMax(maxnz,procsnz[i]); 2933 } 2934 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2935 2936 /* read in my part of the matrix column indices */ 2937 nz = procsnz[0]; 2938 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2939 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2940 2941 /* read in every one elses and ship off */ 2942 for (i=1; i<size; i++) { 2943 nz = procsnz[i]; 2944 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2945 mpicnt = PetscMPIIntCast(nz); 2946 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2947 } 2948 ierr = PetscFree(cols);CHKERRQ(ierr); 2949 } else { 2950 /* determine buffer space needed for message */ 2951 nz = 0; 2952 for (i=0; i<m; i++) { 2953 nz += ourlens[i]; 2954 } 2955 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2956 2957 /* receive message of column indices*/ 2958 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2959 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2960 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2961 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2962 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2963 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2964 } 2965 2966 /* determine column ownership if matrix is not square */ 2967 if (N != M) { 2968 n = N/size + ((N % size) > rank); 2969 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2970 cstart = cend - n; 2971 } else { 2972 cstart = rstart; 2973 cend = rend; 2974 n = cend - cstart; 2975 } 2976 2977 /* loop over local rows, determining number of off diagonal entries */ 2978 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2979 jj = 0; 2980 for (i=0; i<m; i++) { 2981 for (j=0; j<ourlens[i]; j++) { 2982 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2983 jj++; 2984 } 2985 } 2986 2987 /* create our matrix */ 2988 for (i=0; i<m; i++) { 2989 ourlens[i] -= offlens[i]; 2990 } 2991 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2992 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2993 ierr = MatSetType(A,type);CHKERRQ(ierr); 2994 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2995 2996 for (i=0; i<m; i++) { 2997 ourlens[i] += offlens[i]; 2998 } 2999 3000 if (!rank) { 3001 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3002 3003 /* read in my part of the matrix numerical values */ 3004 nz = procsnz[0]; 3005 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3006 3007 /* insert into matrix */ 3008 jj = rstart; 3009 smycols = mycols; 3010 svals = vals; 3011 for (i=0; i<m; i++) { 3012 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3013 smycols += ourlens[i]; 3014 svals += ourlens[i]; 3015 jj++; 3016 } 3017 3018 /* read in other processors and ship out */ 3019 for (i=1; i<size; i++) { 3020 nz = procsnz[i]; 3021 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3022 mpicnt = PetscMPIIntCast(nz); 3023 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3024 } 3025 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3026 } else { 3027 /* receive numeric values */ 3028 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3029 3030 /* receive message of values*/ 3031 mpicnt = PetscMPIIntCast(nz); 3032 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3033 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3034 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3035 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3036 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3037 3038 /* insert into matrix */ 3039 jj = rstart; 3040 smycols = mycols; 3041 svals = vals; 3042 for (i=0; i<m; i++) { 3043 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3044 smycols += ourlens[i]; 3045 svals += ourlens[i]; 3046 jj++; 3047 } 3048 } 3049 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3050 ierr = PetscFree(vals);CHKERRQ(ierr); 3051 ierr = PetscFree(mycols);CHKERRQ(ierr); 3052 ierr = PetscFree(rowners);CHKERRQ(ierr); 3053 3054 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 *newmat = A; 3057 PetscFunctionReturn(0); 3058 } 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3062 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3063 { 3064 PetscErrorCode ierr; 3065 IS iscol_local; 3066 PetscInt csize; 3067 3068 PetscFunctionBegin; 3069 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3070 if (call == MAT_REUSE_MATRIX) { 3071 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3072 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3073 } else { 3074 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3075 } 3076 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3077 if (call == MAT_INITIAL_MATRIX) { 3078 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3079 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3080 } 3081 PetscFunctionReturn(0); 3082 } 3083 3084 #undef __FUNCT__ 3085 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3086 /* 3087 Not great since it makes two copies of the submatrix, first an SeqAIJ 3088 in local and then by concatenating the local matrices the end result. 3089 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3090 3091 Note: This requires a sequential iscol with all indices. 3092 */ 3093 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3094 { 3095 PetscErrorCode ierr; 3096 PetscMPIInt rank,size; 3097 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3098 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3099 Mat *local,M,Mreuse; 3100 MatScalar *vwork,*aa; 3101 MPI_Comm comm = ((PetscObject)mat)->comm; 3102 Mat_SeqAIJ *aij; 3103 3104 3105 PetscFunctionBegin; 3106 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3107 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3108 3109 if (call == MAT_REUSE_MATRIX) { 3110 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3111 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3112 local = &Mreuse; 3113 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3114 } else { 3115 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3116 Mreuse = *local; 3117 ierr = PetscFree(local);CHKERRQ(ierr); 3118 } 3119 3120 /* 3121 m - number of local rows 3122 n - number of columns (same on all processors) 3123 rstart - first row in new global matrix generated 3124 */ 3125 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3126 if (call == MAT_INITIAL_MATRIX) { 3127 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3128 ii = aij->i; 3129 jj = aij->j; 3130 3131 /* 3132 Determine the number of non-zeros in the diagonal and off-diagonal 3133 portions of the matrix in order to do correct preallocation 3134 */ 3135 3136 /* first get start and end of "diagonal" columns */ 3137 if (csize == PETSC_DECIDE) { 3138 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3139 if (mglobal == n) { /* square matrix */ 3140 nlocal = m; 3141 } else { 3142 nlocal = n/size + ((n % size) > rank); 3143 } 3144 } else { 3145 nlocal = csize; 3146 } 3147 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3148 rstart = rend - nlocal; 3149 if (rank == size - 1 && rend != n) { 3150 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3151 } 3152 3153 /* next, compute all the lengths */ 3154 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3155 olens = dlens + m; 3156 for (i=0; i<m; i++) { 3157 jend = ii[i+1] - ii[i]; 3158 olen = 0; 3159 dlen = 0; 3160 for (j=0; j<jend; j++) { 3161 if (*jj < rstart || *jj >= rend) olen++; 3162 else dlen++; 3163 jj++; 3164 } 3165 olens[i] = olen; 3166 dlens[i] = dlen; 3167 } 3168 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3169 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3170 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3171 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3172 ierr = PetscFree(dlens);CHKERRQ(ierr); 3173 } else { 3174 PetscInt ml,nl; 3175 3176 M = *newmat; 3177 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3178 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3179 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3180 /* 3181 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3182 rather than the slower MatSetValues(). 3183 */ 3184 M->was_assembled = PETSC_TRUE; 3185 M->assembled = PETSC_FALSE; 3186 } 3187 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3188 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3189 ii = aij->i; 3190 jj = aij->j; 3191 aa = aij->a; 3192 for (i=0; i<m; i++) { 3193 row = rstart + i; 3194 nz = ii[i+1] - ii[i]; 3195 cwork = jj; jj += nz; 3196 vwork = aa; aa += nz; 3197 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3198 } 3199 3200 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3201 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3202 *newmat = M; 3203 3204 /* save submatrix used in processor for next request */ 3205 if (call == MAT_INITIAL_MATRIX) { 3206 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3207 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3208 } 3209 3210 PetscFunctionReturn(0); 3211 } 3212 3213 EXTERN_C_BEGIN 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3216 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3217 { 3218 PetscInt m,cstart, cend,j,nnz,i,d; 3219 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3220 const PetscInt *JJ; 3221 PetscScalar *values; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3226 3227 ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3228 ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3229 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 3230 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 3231 m = B->rmap->n; 3232 cstart = B->cmap->rstart; 3233 cend = B->cmap->rend; 3234 rstart = B->rmap->rstart; 3235 3236 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3237 o_nnz = d_nnz + m; 3238 3239 #if defined(PETSC_USE_DEBUGGING) 3240 for (i=0; i<m; i++) { 3241 nnz = Ii[i+1]- Ii[i]; 3242 JJ = J + Ii[i]; 3243 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3244 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3245 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3246 for (j=1; j<nnz; j++) { 3247 if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j); 3248 } 3249 } 3250 #endif 3251 3252 for (i=0; i<m; i++) { 3253 nnz = Ii[i+1]- Ii[i]; 3254 JJ = J + Ii[i]; 3255 nnz_max = PetscMax(nnz_max,nnz); 3256 for (j=0; j<nnz; j++) { 3257 if (*JJ >= cstart) break; 3258 JJ++; 3259 } 3260 d = 0; 3261 for (; j<nnz; j++) { 3262 if (*JJ++ >= cend) break; 3263 d++; 3264 } 3265 d_nnz[i] = d; 3266 o_nnz[i] = nnz - d; 3267 } 3268 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3269 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3270 3271 if (v) values = (PetscScalar*)v; 3272 else { 3273 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3274 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3275 } 3276 3277 for (i=0; i<m; i++) { 3278 ii = i + rstart; 3279 nnz = Ii[i+1]- Ii[i]; 3280 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3281 } 3282 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3283 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 3285 if (!v) { 3286 ierr = PetscFree(values);CHKERRQ(ierr); 3287 } 3288 PetscFunctionReturn(0); 3289 } 3290 EXTERN_C_END 3291 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3294 /*@ 3295 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3296 (the default parallel PETSc format). 3297 3298 Collective on MPI_Comm 3299 3300 Input Parameters: 3301 + B - the matrix 3302 . i - the indices into j for the start of each local row (starts with zero) 3303 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3304 - v - optional values in the matrix 3305 3306 Level: developer 3307 3308 Notes: 3309 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3310 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3311 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3312 3313 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3314 3315 The format which is used for the sparse matrix input, is equivalent to a 3316 row-major ordering.. i.e for the following matrix, the input data expected is 3317 as shown: 3318 3319 1 0 0 3320 2 0 3 P0 3321 ------- 3322 4 5 6 P1 3323 3324 Process0 [P0]: rows_owned=[0,1] 3325 i = {0,1,3} [size = nrow+1 = 2+1] 3326 j = {0,0,2} [size = nz = 6] 3327 v = {1,2,3} [size = nz = 6] 3328 3329 Process1 [P1]: rows_owned=[2] 3330 i = {0,3} [size = nrow+1 = 1+1] 3331 j = {0,1,2} [size = nz = 6] 3332 v = {4,5,6} [size = nz = 6] 3333 3334 The column indices for each row MUST be sorted. 3335 3336 .keywords: matrix, aij, compressed row, sparse, parallel 3337 3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3339 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3340 @*/ 3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3342 { 3343 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3344 3345 PetscFunctionBegin; 3346 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3347 if (f) { 3348 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3349 } 3350 PetscFunctionReturn(0); 3351 } 3352 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3355 /*@C 3356 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3357 (the default parallel PETSc format). For good matrix assembly performance 3358 the user should preallocate the matrix storage by setting the parameters 3359 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3360 performance can be increased by more than a factor of 50. 3361 3362 Collective on MPI_Comm 3363 3364 Input Parameters: 3365 + A - the matrix 3366 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3367 (same value is used for all local rows) 3368 . d_nnz - array containing the number of nonzeros in the various rows of the 3369 DIAGONAL portion of the local submatrix (possibly different for each row) 3370 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3371 The size of this array is equal to the number of local rows, i.e 'm'. 3372 You must leave room for the diagonal entry even if it is zero. 3373 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3374 submatrix (same value is used for all local rows). 3375 - o_nnz - array containing the number of nonzeros in the various rows of the 3376 OFF-DIAGONAL portion of the local submatrix (possibly different for 3377 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3378 structure. The size of this array is equal to the number 3379 of local rows, i.e 'm'. 3380 3381 If the *_nnz parameter is given then the *_nz parameter is ignored 3382 3383 The AIJ format (also called the Yale sparse matrix format or 3384 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3385 storage. The stored row and column indices begin with zero. See the users manual for details. 3386 3387 The parallel matrix is partitioned such that the first m0 rows belong to 3388 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3389 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3390 3391 The DIAGONAL portion of the local submatrix of a processor can be defined 3392 as the submatrix which is obtained by extraction the part corresponding 3393 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3394 first row that belongs to the processor, and r2 is the last row belonging 3395 to the this processor. This is a square mxm matrix. The remaining portion 3396 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3397 3398 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3399 3400 You can call MatGetInfo() to get information on how effective the preallocation was; 3401 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3402 You can also run with the option -info and look for messages with the string 3403 malloc in them to see if additional memory allocation was needed. 3404 3405 Example usage: 3406 3407 Consider the following 8x8 matrix with 34 non-zero values, that is 3408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3410 as follows: 3411 3412 .vb 3413 1 2 0 | 0 3 0 | 0 4 3414 Proc0 0 5 6 | 7 0 0 | 8 0 3415 9 0 10 | 11 0 0 | 12 0 3416 ------------------------------------- 3417 13 0 14 | 15 16 17 | 0 0 3418 Proc1 0 18 0 | 19 20 21 | 0 0 3419 0 0 0 | 22 23 0 | 24 0 3420 ------------------------------------- 3421 Proc2 25 26 27 | 0 0 28 | 29 0 3422 30 0 0 | 31 32 33 | 0 34 3423 .ve 3424 3425 This can be represented as a collection of submatrices as: 3426 3427 .vb 3428 A B C 3429 D E F 3430 G H I 3431 .ve 3432 3433 Where the submatrices A,B,C are owned by proc0, D,E,F are 3434 owned by proc1, G,H,I are owned by proc2. 3435 3436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3438 The 'M','N' parameters are 8,8, and have the same values on all procs. 3439 3440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3445 matrix, ans [DF] as another SeqAIJ matrix. 3446 3447 When d_nz, o_nz parameters are specified, d_nz storage elements are 3448 allocated for every row of the local diagonal submatrix, and o_nz 3449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3450 One way to choose d_nz and o_nz is to use the max nonzerors per local 3451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3452 In this case, the values of d_nz,o_nz are: 3453 .vb 3454 proc0 : dnz = 2, o_nz = 2 3455 proc1 : dnz = 3, o_nz = 2 3456 proc2 : dnz = 1, o_nz = 4 3457 .ve 3458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3460 for proc3. i.e we are using 12+15+10=37 storage locations to store 3461 34 values. 3462 3463 When d_nnz, o_nnz parameters are specified, the storage is specified 3464 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3465 In the above case the values for d_nnz,o_nnz are: 3466 .vb 3467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3470 .ve 3471 Here the space allocated is sum of all the above values i.e 34, and 3472 hence pre-allocation is perfect. 3473 3474 Level: intermediate 3475 3476 .keywords: matrix, aij, compressed row, sparse, parallel 3477 3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3479 MPIAIJ, MatGetInfo() 3480 @*/ 3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3482 { 3483 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3484 3485 PetscFunctionBegin; 3486 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3487 if (f) { 3488 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3489 } 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3495 /*@ 3496 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3497 CSR format the local rows. 3498 3499 Collective on MPI_Comm 3500 3501 Input Parameters: 3502 + comm - MPI communicator 3503 . m - number of local rows (Cannot be PETSC_DECIDE) 3504 . n - This value should be the same as the local size used in creating the 3505 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3506 calculated if N is given) For square matrices n is almost always m. 3507 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3508 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3509 . i - row indices 3510 . j - column indices 3511 - a - matrix values 3512 3513 Output Parameter: 3514 . mat - the matrix 3515 3516 Level: intermediate 3517 3518 Notes: 3519 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3520 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3521 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3522 3523 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3524 3525 The format which is used for the sparse matrix input, is equivalent to a 3526 row-major ordering.. i.e for the following matrix, the input data expected is 3527 as shown: 3528 3529 1 0 0 3530 2 0 3 P0 3531 ------- 3532 4 5 6 P1 3533 3534 Process0 [P0]: rows_owned=[0,1] 3535 i = {0,1,3} [size = nrow+1 = 2+1] 3536 j = {0,0,2} [size = nz = 6] 3537 v = {1,2,3} [size = nz = 6] 3538 3539 Process1 [P1]: rows_owned=[2] 3540 i = {0,3} [size = nrow+1 = 1+1] 3541 j = {0,1,2} [size = nz = 6] 3542 v = {4,5,6} [size = nz = 6] 3543 3544 .keywords: matrix, aij, compressed row, sparse, parallel 3545 3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3547 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3548 @*/ 3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3550 { 3551 PetscErrorCode ierr; 3552 3553 PetscFunctionBegin; 3554 if (i[0]) { 3555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3556 } 3557 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3558 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3560 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3561 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatCreateMPIAIJ" 3567 /*@C 3568 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3569 (the default parallel PETSc format). For good matrix assembly performance 3570 the user should preallocate the matrix storage by setting the parameters 3571 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3572 performance can be increased by more than a factor of 50. 3573 3574 Collective on MPI_Comm 3575 3576 Input Parameters: 3577 + comm - MPI communicator 3578 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3579 This value should be the same as the local size used in creating the 3580 y vector for the matrix-vector product y = Ax. 3581 . n - This value should be the same as the local size used in creating the 3582 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3583 calculated if N is given) For square matrices n is almost always m. 3584 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3585 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3586 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3587 (same value is used for all local rows) 3588 . d_nnz - array containing the number of nonzeros in the various rows of the 3589 DIAGONAL portion of the local submatrix (possibly different for each row) 3590 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3591 The size of this array is equal to the number of local rows, i.e 'm'. 3592 You must leave room for the diagonal entry even if it is zero. 3593 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3594 submatrix (same value is used for all local rows). 3595 - o_nnz - array containing the number of nonzeros in the various rows of the 3596 OFF-DIAGONAL portion of the local submatrix (possibly different for 3597 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3598 structure. The size of this array is equal to the number 3599 of local rows, i.e 'm'. 3600 3601 Output Parameter: 3602 . A - the matrix 3603 3604 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3605 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3606 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3607 3608 Notes: 3609 If the *_nnz parameter is given then the *_nz parameter is ignored 3610 3611 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3612 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3613 storage requirements for this matrix. 3614 3615 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3616 processor than it must be used on all processors that share the object for 3617 that argument. 3618 3619 The user MUST specify either the local or global matrix dimensions 3620 (possibly both). 3621 3622 The parallel matrix is partitioned across processors such that the 3623 first m0 rows belong to process 0, the next m1 rows belong to 3624 process 1, the next m2 rows belong to process 2 etc.. where 3625 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3626 values corresponding to [m x N] submatrix. 3627 3628 The columns are logically partitioned with the n0 columns belonging 3629 to 0th partition, the next n1 columns belonging to the next 3630 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3631 3632 The DIAGONAL portion of the local submatrix on any given processor 3633 is the submatrix corresponding to the rows and columns m,n 3634 corresponding to the given processor. i.e diagonal matrix on 3635 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3636 etc. The remaining portion of the local submatrix [m x (N-n)] 3637 constitute the OFF-DIAGONAL portion. The example below better 3638 illustrates this concept. 3639 3640 For a square global matrix we define each processor's diagonal portion 3641 to be its local rows and the corresponding columns (a square submatrix); 3642 each processor's off-diagonal portion encompasses the remainder of the 3643 local matrix (a rectangular submatrix). 3644 3645 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3646 3647 When calling this routine with a single process communicator, a matrix of 3648 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3649 type of communicator, use the construction mechanism: 3650 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3651 3652 By default, this format uses inodes (identical nodes) when possible. 3653 We search for consecutive rows with the same nonzero structure, thereby 3654 reusing matrix information to achieve increased efficiency. 3655 3656 Options Database Keys: 3657 + -mat_no_inode - Do not use inodes 3658 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3659 - -mat_aij_oneindex - Internally use indexing starting at 1 3660 rather than 0. Note that when calling MatSetValues(), 3661 the user still MUST index entries starting at 0! 3662 3663 3664 Example usage: 3665 3666 Consider the following 8x8 matrix with 34 non-zero values, that is 3667 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3668 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3669 as follows: 3670 3671 .vb 3672 1 2 0 | 0 3 0 | 0 4 3673 Proc0 0 5 6 | 7 0 0 | 8 0 3674 9 0 10 | 11 0 0 | 12 0 3675 ------------------------------------- 3676 13 0 14 | 15 16 17 | 0 0 3677 Proc1 0 18 0 | 19 20 21 | 0 0 3678 0 0 0 | 22 23 0 | 24 0 3679 ------------------------------------- 3680 Proc2 25 26 27 | 0 0 28 | 29 0 3681 30 0 0 | 31 32 33 | 0 34 3682 .ve 3683 3684 This can be represented as a collection of submatrices as: 3685 3686 .vb 3687 A B C 3688 D E F 3689 G H I 3690 .ve 3691 3692 Where the submatrices A,B,C are owned by proc0, D,E,F are 3693 owned by proc1, G,H,I are owned by proc2. 3694 3695 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3696 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3697 The 'M','N' parameters are 8,8, and have the same values on all procs. 3698 3699 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3700 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3701 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3702 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3703 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3704 matrix, ans [DF] as another SeqAIJ matrix. 3705 3706 When d_nz, o_nz parameters are specified, d_nz storage elements are 3707 allocated for every row of the local diagonal submatrix, and o_nz 3708 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3709 One way to choose d_nz and o_nz is to use the max nonzerors per local 3710 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3711 In this case, the values of d_nz,o_nz are: 3712 .vb 3713 proc0 : dnz = 2, o_nz = 2 3714 proc1 : dnz = 3, o_nz = 2 3715 proc2 : dnz = 1, o_nz = 4 3716 .ve 3717 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3718 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3719 for proc3. i.e we are using 12+15+10=37 storage locations to store 3720 34 values. 3721 3722 When d_nnz, o_nnz parameters are specified, the storage is specified 3723 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3724 In the above case the values for d_nnz,o_nnz are: 3725 .vb 3726 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3727 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3728 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3729 .ve 3730 Here the space allocated is sum of all the above values i.e 34, and 3731 hence pre-allocation is perfect. 3732 3733 Level: intermediate 3734 3735 .keywords: matrix, aij, compressed row, sparse, parallel 3736 3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3738 MPIAIJ, MatCreateMPIAIJWithArrays() 3739 @*/ 3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3741 { 3742 PetscErrorCode ierr; 3743 PetscMPIInt size; 3744 3745 PetscFunctionBegin; 3746 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3747 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3749 if (size > 1) { 3750 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3751 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3752 } else { 3753 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3754 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3755 } 3756 PetscFunctionReturn(0); 3757 } 3758 3759 #undef __FUNCT__ 3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3762 { 3763 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3764 3765 PetscFunctionBegin; 3766 *Ad = a->A; 3767 *Ao = a->B; 3768 *colmap = a->garray; 3769 PetscFunctionReturn(0); 3770 } 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3775 { 3776 PetscErrorCode ierr; 3777 PetscInt i; 3778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3779 3780 PetscFunctionBegin; 3781 if (coloring->ctype == IS_COLORING_GLOBAL) { 3782 ISColoringValue *allcolors,*colors; 3783 ISColoring ocoloring; 3784 3785 /* set coloring for diagonal portion */ 3786 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3787 3788 /* set coloring for off-diagonal portion */ 3789 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3790 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3791 for (i=0; i<a->B->cmap->n; i++) { 3792 colors[i] = allcolors[a->garray[i]]; 3793 } 3794 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3795 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3796 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3797 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3798 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3799 ISColoringValue *colors; 3800 PetscInt *larray; 3801 ISColoring ocoloring; 3802 3803 /* set coloring for diagonal portion */ 3804 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3805 for (i=0; i<a->A->cmap->n; i++) { 3806 larray[i] = i + A->cmap->rstart; 3807 } 3808 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3809 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3810 for (i=0; i<a->A->cmap->n; i++) { 3811 colors[i] = coloring->colors[larray[i]]; 3812 } 3813 ierr = PetscFree(larray);CHKERRQ(ierr); 3814 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3815 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3816 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3817 3818 /* set coloring for off-diagonal portion */ 3819 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3820 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3821 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3822 for (i=0; i<a->B->cmap->n; i++) { 3823 colors[i] = coloring->colors[larray[i]]; 3824 } 3825 ierr = PetscFree(larray);CHKERRQ(ierr); 3826 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3827 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3828 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3829 } else { 3830 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3831 } 3832 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #if defined(PETSC_HAVE_ADIC) 3837 #undef __FUNCT__ 3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3840 { 3841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3842 PetscErrorCode ierr; 3843 3844 PetscFunctionBegin; 3845 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3846 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3847 PetscFunctionReturn(0); 3848 } 3849 #endif 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3854 { 3855 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3856 PetscErrorCode ierr; 3857 3858 PetscFunctionBegin; 3859 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3860 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatMerge" 3866 /*@ 3867 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3868 matrices from each processor 3869 3870 Collective on MPI_Comm 3871 3872 Input Parameters: 3873 + comm - the communicators the parallel matrix will live on 3874 . inmat - the input sequential matrices 3875 . n - number of local columns (or PETSC_DECIDE) 3876 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3877 3878 Output Parameter: 3879 . outmat - the parallel matrix generated 3880 3881 Level: advanced 3882 3883 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3884 3885 @*/ 3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3887 { 3888 PetscErrorCode ierr; 3889 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3890 PetscInt *indx; 3891 PetscScalar *values; 3892 3893 PetscFunctionBegin; 3894 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3895 if (scall == MAT_INITIAL_MATRIX){ 3896 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3897 if (n == PETSC_DECIDE){ 3898 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3899 } 3900 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3901 rstart -= m; 3902 3903 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3904 for (i=0;i<m;i++) { 3905 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3906 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3907 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3908 } 3909 /* This routine will ONLY return MPIAIJ type matrix */ 3910 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3911 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3912 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3913 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3914 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3915 3916 } else if (scall == MAT_REUSE_MATRIX){ 3917 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3918 } else { 3919 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3920 } 3921 3922 for (i=0;i<m;i++) { 3923 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3924 Ii = i + rstart; 3925 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3926 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3927 } 3928 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3929 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3930 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 3932 PetscFunctionReturn(0); 3933 } 3934 3935 #undef __FUNCT__ 3936 #define __FUNCT__ "MatFileSplit" 3937 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3938 { 3939 PetscErrorCode ierr; 3940 PetscMPIInt rank; 3941 PetscInt m,N,i,rstart,nnz; 3942 size_t len; 3943 const PetscInt *indx; 3944 PetscViewer out; 3945 char *name; 3946 Mat B; 3947 const PetscScalar *values; 3948 3949 PetscFunctionBegin; 3950 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3951 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3952 /* Should this be the type of the diagonal block of A? */ 3953 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3954 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3955 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3956 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3957 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3958 for (i=0;i<m;i++) { 3959 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3960 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3961 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3962 } 3963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3965 3966 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3967 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3968 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3969 sprintf(name,"%s.%d",outfile,rank); 3970 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3971 ierr = PetscFree(name); 3972 ierr = MatView(B,out);CHKERRQ(ierr); 3973 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3974 ierr = MatDestroy(B);CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977 3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3979 #undef __FUNCT__ 3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3982 { 3983 PetscErrorCode ierr; 3984 Mat_Merge_SeqsToMPI *merge; 3985 PetscContainer container; 3986 3987 PetscFunctionBegin; 3988 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3989 if (container) { 3990 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 4002 4003 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4004 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4005 } 4006 ierr = PetscFree(merge);CHKERRQ(ierr); 4007 4008 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4009 PetscFunctionReturn(0); 4010 } 4011 4012 #include "../src/mat/utils/freespace.h" 4013 #include "petscbt.h" 4014 4015 #undef __FUNCT__ 4016 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4017 /*@C 4018 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4019 matrices from each processor 4020 4021 Collective on MPI_Comm 4022 4023 Input Parameters: 4024 + comm - the communicators the parallel matrix will live on 4025 . seqmat - the input sequential matrices 4026 . m - number of local rows (or PETSC_DECIDE) 4027 . n - number of local columns (or PETSC_DECIDE) 4028 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4029 4030 Output Parameter: 4031 . mpimat - the parallel matrix generated 4032 4033 Level: advanced 4034 4035 Notes: 4036 The dimensions of the sequential matrix in each processor MUST be the same. 4037 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4038 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4039 @*/ 4040 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4041 { 4042 PetscErrorCode ierr; 4043 MPI_Comm comm=((PetscObject)mpimat)->comm; 4044 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4045 PetscMPIInt size,rank,taga,*len_s; 4046 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4047 PetscInt proc,m; 4048 PetscInt **buf_ri,**buf_rj; 4049 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4050 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4051 MPI_Request *s_waits,*r_waits; 4052 MPI_Status *status; 4053 MatScalar *aa=a->a; 4054 MatScalar **abuf_r,*ba_i; 4055 Mat_Merge_SeqsToMPI *merge; 4056 PetscContainer container; 4057 4058 PetscFunctionBegin; 4059 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4060 4061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4062 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4063 4064 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4065 if (container) { 4066 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4067 } 4068 bi = merge->bi; 4069 bj = merge->bj; 4070 buf_ri = merge->buf_ri; 4071 buf_rj = merge->buf_rj; 4072 4073 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4074 owners = merge->rowmap.range; 4075 len_s = merge->len_s; 4076 4077 /* send and recv matrix values */ 4078 /*-----------------------------*/ 4079 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4080 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4081 4082 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4083 for (proc=0,k=0; proc<size; proc++){ 4084 if (!len_s[proc]) continue; 4085 i = owners[proc]; 4086 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4087 k++; 4088 } 4089 4090 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4091 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4092 ierr = PetscFree(status);CHKERRQ(ierr); 4093 4094 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4095 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4096 4097 /* insert mat values of mpimat */ 4098 /*----------------------------*/ 4099 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4100 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4101 nextrow = buf_ri_k + merge->nrecv; 4102 nextai = nextrow + merge->nrecv; 4103 4104 for (k=0; k<merge->nrecv; k++){ 4105 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4106 nrows = *(buf_ri_k[k]); 4107 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4108 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4109 } 4110 4111 /* set values of ba */ 4112 m = merge->rowmap.n; 4113 for (i=0; i<m; i++) { 4114 arow = owners[rank] + i; 4115 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4116 bnzi = bi[i+1] - bi[i]; 4117 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4118 4119 /* add local non-zero vals of this proc's seqmat into ba */ 4120 anzi = ai[arow+1] - ai[arow]; 4121 aj = a->j + ai[arow]; 4122 aa = a->a + ai[arow]; 4123 nextaj = 0; 4124 for (j=0; nextaj<anzi; j++){ 4125 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4126 ba_i[j] += aa[nextaj++]; 4127 } 4128 } 4129 4130 /* add received vals into ba */ 4131 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4132 /* i-th row */ 4133 if (i == *nextrow[k]) { 4134 anzi = *(nextai[k]+1) - *nextai[k]; 4135 aj = buf_rj[k] + *(nextai[k]); 4136 aa = abuf_r[k] + *(nextai[k]); 4137 nextaj = 0; 4138 for (j=0; nextaj<anzi; j++){ 4139 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4140 ba_i[j] += aa[nextaj++]; 4141 } 4142 } 4143 nextrow[k]++; nextai[k]++; 4144 } 4145 } 4146 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4147 } 4148 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 4151 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4152 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4153 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4154 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4155 PetscFunctionReturn(0); 4156 } 4157 4158 #undef __FUNCT__ 4159 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4160 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4161 { 4162 PetscErrorCode ierr; 4163 Mat B_mpi; 4164 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4165 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4166 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4167 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4168 PetscInt len,proc,*dnz,*onz; 4169 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4170 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4171 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4172 MPI_Status *status; 4173 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4174 PetscBT lnkbt; 4175 Mat_Merge_SeqsToMPI *merge; 4176 PetscContainer container; 4177 4178 PetscFunctionBegin; 4179 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4180 4181 /* make sure it is a PETSc comm */ 4182 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4185 4186 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4187 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4188 4189 /* determine row ownership */ 4190 /*---------------------------------------------------------*/ 4191 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 4192 merge->rowmap.n = m; 4193 merge->rowmap.N = M; 4194 merge->rowmap.bs = 1; 4195 ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr); 4196 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4197 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4198 4199 m = merge->rowmap.n; 4200 M = merge->rowmap.N; 4201 owners = merge->rowmap.range; 4202 4203 /* determine the number of messages to send, their lengths */ 4204 /*---------------------------------------------------------*/ 4205 len_s = merge->len_s; 4206 4207 len = 0; /* length of buf_si[] */ 4208 merge->nsend = 0; 4209 for (proc=0; proc<size; proc++){ 4210 len_si[proc] = 0; 4211 if (proc == rank){ 4212 len_s[proc] = 0; 4213 } else { 4214 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4215 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4216 } 4217 if (len_s[proc]) { 4218 merge->nsend++; 4219 nrows = 0; 4220 for (i=owners[proc]; i<owners[proc+1]; i++){ 4221 if (ai[i+1] > ai[i]) nrows++; 4222 } 4223 len_si[proc] = 2*(nrows+1); 4224 len += len_si[proc]; 4225 } 4226 } 4227 4228 /* determine the number and length of messages to receive for ij-structure */ 4229 /*-------------------------------------------------------------------------*/ 4230 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4231 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4232 4233 /* post the Irecv of j-structure */ 4234 /*-------------------------------*/ 4235 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4236 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4237 4238 /* post the Isend of j-structure */ 4239 /*--------------------------------*/ 4240 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4241 sj_waits = si_waits + merge->nsend; 4242 4243 for (proc=0, k=0; proc<size; proc++){ 4244 if (!len_s[proc]) continue; 4245 i = owners[proc]; 4246 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4247 k++; 4248 } 4249 4250 /* receives and sends of j-structure are complete */ 4251 /*------------------------------------------------*/ 4252 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4253 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4254 4255 /* send and recv i-structure */ 4256 /*---------------------------*/ 4257 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4258 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4259 4260 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4261 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4262 for (proc=0,k=0; proc<size; proc++){ 4263 if (!len_s[proc]) continue; 4264 /* form outgoing message for i-structure: 4265 buf_si[0]: nrows to be sent 4266 [1:nrows]: row index (global) 4267 [nrows+1:2*nrows+1]: i-structure index 4268 */ 4269 /*-------------------------------------------*/ 4270 nrows = len_si[proc]/2 - 1; 4271 buf_si_i = buf_si + nrows+1; 4272 buf_si[0] = nrows; 4273 buf_si_i[0] = 0; 4274 nrows = 0; 4275 for (i=owners[proc]; i<owners[proc+1]; i++){ 4276 anzi = ai[i+1] - ai[i]; 4277 if (anzi) { 4278 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4279 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4280 nrows++; 4281 } 4282 } 4283 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4284 k++; 4285 buf_si += len_si[proc]; 4286 } 4287 4288 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4289 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4290 4291 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4292 for (i=0; i<merge->nrecv; i++){ 4293 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4294 } 4295 4296 ierr = PetscFree(len_si);CHKERRQ(ierr); 4297 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4298 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4299 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4300 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4301 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4302 ierr = PetscFree(status);CHKERRQ(ierr); 4303 4304 /* compute a local seq matrix in each processor */ 4305 /*----------------------------------------------*/ 4306 /* allocate bi array and free space for accumulating nonzero column info */ 4307 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4308 bi[0] = 0; 4309 4310 /* create and initialize a linked list */ 4311 nlnk = N+1; 4312 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4313 4314 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4315 len = 0; 4316 len = ai[owners[rank+1]] - ai[owners[rank]]; 4317 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4318 current_space = free_space; 4319 4320 /* determine symbolic info for each local row */ 4321 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4322 nextrow = buf_ri_k + merge->nrecv; 4323 nextai = nextrow + merge->nrecv; 4324 for (k=0; k<merge->nrecv; k++){ 4325 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4326 nrows = *buf_ri_k[k]; 4327 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4328 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4329 } 4330 4331 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4332 len = 0; 4333 for (i=0;i<m;i++) { 4334 bnzi = 0; 4335 /* add local non-zero cols of this proc's seqmat into lnk */ 4336 arow = owners[rank] + i; 4337 anzi = ai[arow+1] - ai[arow]; 4338 aj = a->j + ai[arow]; 4339 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4340 bnzi += nlnk; 4341 /* add received col data into lnk */ 4342 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4343 if (i == *nextrow[k]) { /* i-th row */ 4344 anzi = *(nextai[k]+1) - *nextai[k]; 4345 aj = buf_rj[k] + *nextai[k]; 4346 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4347 bnzi += nlnk; 4348 nextrow[k]++; nextai[k]++; 4349 } 4350 } 4351 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4352 4353 /* if free space is not available, make more free space */ 4354 if (current_space->local_remaining<bnzi) { 4355 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4356 nspacedouble++; 4357 } 4358 /* copy data into free space, then initialize lnk */ 4359 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4360 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4361 4362 current_space->array += bnzi; 4363 current_space->local_used += bnzi; 4364 current_space->local_remaining -= bnzi; 4365 4366 bi[i+1] = bi[i] + bnzi; 4367 } 4368 4369 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4370 4371 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4372 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4374 4375 /* create symbolic parallel matrix B_mpi */ 4376 /*---------------------------------------*/ 4377 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4378 if (n==PETSC_DECIDE) { 4379 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4380 } else { 4381 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4382 } 4383 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4384 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4385 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4386 4387 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4388 B_mpi->assembled = PETSC_FALSE; 4389 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4390 merge->bi = bi; 4391 merge->bj = bj; 4392 merge->buf_ri = buf_ri; 4393 merge->buf_rj = buf_rj; 4394 merge->coi = PETSC_NULL; 4395 merge->coj = PETSC_NULL; 4396 merge->owners_co = PETSC_NULL; 4397 4398 /* attach the supporting struct to B_mpi for reuse */ 4399 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4400 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4401 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4402 *mpimat = B_mpi; 4403 4404 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4405 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4406 PetscFunctionReturn(0); 4407 } 4408 4409 #undef __FUNCT__ 4410 #define __FUNCT__ "MatMerge_SeqsToMPI" 4411 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4412 { 4413 PetscErrorCode ierr; 4414 4415 PetscFunctionBegin; 4416 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4417 if (scall == MAT_INITIAL_MATRIX){ 4418 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4419 } 4420 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4421 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4422 PetscFunctionReturn(0); 4423 } 4424 4425 #undef __FUNCT__ 4426 #define __FUNCT__ "MatGetLocalMat" 4427 /*@ 4428 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4429 4430 Not Collective 4431 4432 Input Parameters: 4433 + A - the matrix 4434 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4435 4436 Output Parameter: 4437 . A_loc - the local sequential matrix generated 4438 4439 Level: developer 4440 4441 @*/ 4442 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4443 { 4444 PetscErrorCode ierr; 4445 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4446 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4447 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4448 MatScalar *aa=a->a,*ba=b->a,*cam; 4449 PetscScalar *ca; 4450 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4451 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4452 4453 PetscFunctionBegin; 4454 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4455 if (scall == MAT_INITIAL_MATRIX){ 4456 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4457 ci[0] = 0; 4458 for (i=0; i<am; i++){ 4459 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4460 } 4461 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4462 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4463 k = 0; 4464 for (i=0; i<am; i++) { 4465 ncols_o = bi[i+1] - bi[i]; 4466 ncols_d = ai[i+1] - ai[i]; 4467 /* off-diagonal portion of A */ 4468 for (jo=0; jo<ncols_o; jo++) { 4469 col = cmap[*bj]; 4470 if (col >= cstart) break; 4471 cj[k] = col; bj++; 4472 ca[k++] = *ba++; 4473 } 4474 /* diagonal portion of A */ 4475 for (j=0; j<ncols_d; j++) { 4476 cj[k] = cstart + *aj++; 4477 ca[k++] = *aa++; 4478 } 4479 /* off-diagonal portion of A */ 4480 for (j=jo; j<ncols_o; j++) { 4481 cj[k] = cmap[*bj++]; 4482 ca[k++] = *ba++; 4483 } 4484 } 4485 /* put together the new matrix */ 4486 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4487 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4488 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4489 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4490 mat->free_a = PETSC_TRUE; 4491 mat->free_ij = PETSC_TRUE; 4492 mat->nonew = 0; 4493 } else if (scall == MAT_REUSE_MATRIX){ 4494 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4495 ci = mat->i; cj = mat->j; cam = mat->a; 4496 for (i=0; i<am; i++) { 4497 /* off-diagonal portion of A */ 4498 ncols_o = bi[i+1] - bi[i]; 4499 for (jo=0; jo<ncols_o; jo++) { 4500 col = cmap[*bj]; 4501 if (col >= cstart) break; 4502 *cam++ = *ba++; bj++; 4503 } 4504 /* diagonal portion of A */ 4505 ncols_d = ai[i+1] - ai[i]; 4506 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4507 /* off-diagonal portion of A */ 4508 for (j=jo; j<ncols_o; j++) { 4509 *cam++ = *ba++; bj++; 4510 } 4511 } 4512 } else { 4513 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4514 } 4515 4516 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4517 PetscFunctionReturn(0); 4518 } 4519 4520 #undef __FUNCT__ 4521 #define __FUNCT__ "MatGetLocalMatCondensed" 4522 /*@C 4523 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4524 4525 Not Collective 4526 4527 Input Parameters: 4528 + A - the matrix 4529 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4530 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4531 4532 Output Parameter: 4533 . A_loc - the local sequential matrix generated 4534 4535 Level: developer 4536 4537 @*/ 4538 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4539 { 4540 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4541 PetscErrorCode ierr; 4542 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4543 IS isrowa,iscola; 4544 Mat *aloc; 4545 4546 PetscFunctionBegin; 4547 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4548 if (!row){ 4549 start = A->rmap->rstart; end = A->rmap->rend; 4550 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4551 } else { 4552 isrowa = *row; 4553 } 4554 if (!col){ 4555 start = A->cmap->rstart; 4556 cmap = a->garray; 4557 nzA = a->A->cmap->n; 4558 nzB = a->B->cmap->n; 4559 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4560 ncols = 0; 4561 for (i=0; i<nzB; i++) { 4562 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4563 else break; 4564 } 4565 imark = i; 4566 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4567 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4568 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4569 ierr = PetscFree(idx);CHKERRQ(ierr); 4570 } else { 4571 iscola = *col; 4572 } 4573 if (scall != MAT_INITIAL_MATRIX){ 4574 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4575 aloc[0] = *A_loc; 4576 } 4577 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4578 *A_loc = aloc[0]; 4579 ierr = PetscFree(aloc);CHKERRQ(ierr); 4580 if (!row){ 4581 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4582 } 4583 if (!col){ 4584 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4585 } 4586 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4587 PetscFunctionReturn(0); 4588 } 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "MatGetBrowsOfAcols" 4592 /*@C 4593 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4594 4595 Collective on Mat 4596 4597 Input Parameters: 4598 + A,B - the matrices in mpiaij format 4599 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4600 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4601 4602 Output Parameter: 4603 + rowb, colb - index sets of rows and columns of B to extract 4604 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4605 - B_seq - the sequential matrix generated 4606 4607 Level: developer 4608 4609 @*/ 4610 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4611 { 4612 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4613 PetscErrorCode ierr; 4614 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4615 IS isrowb,iscolb; 4616 Mat *bseq; 4617 4618 PetscFunctionBegin; 4619 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4620 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4621 } 4622 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4623 4624 if (scall == MAT_INITIAL_MATRIX){ 4625 start = A->cmap->rstart; 4626 cmap = a->garray; 4627 nzA = a->A->cmap->n; 4628 nzB = a->B->cmap->n; 4629 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4630 ncols = 0; 4631 for (i=0; i<nzB; i++) { /* row < local row index */ 4632 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4633 else break; 4634 } 4635 imark = i; 4636 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4637 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4638 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4639 ierr = PetscFree(idx);CHKERRQ(ierr); 4640 *brstart = imark; 4641 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4642 } else { 4643 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4644 isrowb = *rowb; iscolb = *colb; 4645 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4646 bseq[0] = *B_seq; 4647 } 4648 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4649 *B_seq = bseq[0]; 4650 ierr = PetscFree(bseq);CHKERRQ(ierr); 4651 if (!rowb){ 4652 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4653 } else { 4654 *rowb = isrowb; 4655 } 4656 if (!colb){ 4657 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4658 } else { 4659 *colb = iscolb; 4660 } 4661 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4662 PetscFunctionReturn(0); 4663 } 4664 4665 #undef __FUNCT__ 4666 #define __FUNCT__ "MatGetBrowsOfAoCols" 4667 /*@C 4668 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4669 of the OFF-DIAGONAL portion of local A 4670 4671 Collective on Mat 4672 4673 Input Parameters: 4674 + A,B - the matrices in mpiaij format 4675 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4676 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4677 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4678 4679 Output Parameter: 4680 + B_oth - the sequential matrix generated 4681 4682 Level: developer 4683 4684 @*/ 4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4686 { 4687 VecScatter_MPI_General *gen_to,*gen_from; 4688 PetscErrorCode ierr; 4689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4690 Mat_SeqAIJ *b_oth; 4691 VecScatter ctx=a->Mvctx; 4692 MPI_Comm comm=((PetscObject)ctx)->comm; 4693 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4694 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4695 PetscScalar *rvalues,*svalues; 4696 MatScalar *b_otha,*bufa,*bufA; 4697 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4698 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4699 MPI_Status *sstatus,rstatus; 4700 PetscMPIInt jj; 4701 PetscInt *cols,sbs,rbs; 4702 PetscScalar *vals; 4703 4704 PetscFunctionBegin; 4705 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4706 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4707 } 4708 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4710 4711 gen_to = (VecScatter_MPI_General*)ctx->todata; 4712 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4713 rvalues = gen_from->values; /* holds the length of receiving row */ 4714 svalues = gen_to->values; /* holds the length of sending row */ 4715 nrecvs = gen_from->n; 4716 nsends = gen_to->n; 4717 4718 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4719 srow = gen_to->indices; /* local row index to be sent */ 4720 sstarts = gen_to->starts; 4721 sprocs = gen_to->procs; 4722 sstatus = gen_to->sstatus; 4723 sbs = gen_to->bs; 4724 rstarts = gen_from->starts; 4725 rprocs = gen_from->procs; 4726 rbs = gen_from->bs; 4727 4728 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4729 if (scall == MAT_INITIAL_MATRIX){ 4730 /* i-array */ 4731 /*---------*/ 4732 /* post receives */ 4733 for (i=0; i<nrecvs; i++){ 4734 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4736 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4737 } 4738 4739 /* pack the outgoing message */ 4740 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4741 rstartsj = sstartsj + nsends +1; 4742 sstartsj[0] = 0; rstartsj[0] = 0; 4743 len = 0; /* total length of j or a array to be sent */ 4744 k = 0; 4745 for (i=0; i<nsends; i++){ 4746 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4747 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4748 for (j=0; j<nrows; j++) { 4749 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4750 for (l=0; l<sbs; l++){ 4751 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4752 rowlen[j*sbs+l] = ncols; 4753 len += ncols; 4754 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4755 } 4756 k++; 4757 } 4758 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4759 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4760 } 4761 /* recvs and sends of i-array are completed */ 4762 i = nrecvs; 4763 while (i--) { 4764 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4765 } 4766 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4767 4768 /* allocate buffers for sending j and a arrays */ 4769 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4770 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4771 4772 /* create i-array of B_oth */ 4773 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4774 b_othi[0] = 0; 4775 len = 0; /* total length of j or a array to be received */ 4776 k = 0; 4777 for (i=0; i<nrecvs; i++){ 4778 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4779 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4780 for (j=0; j<nrows; j++) { 4781 b_othi[k+1] = b_othi[k] + rowlen[j]; 4782 len += rowlen[j]; k++; 4783 } 4784 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4785 } 4786 4787 /* allocate space for j and a arrrays of B_oth */ 4788 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4789 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4790 4791 /* j-array */ 4792 /*---------*/ 4793 /* post receives of j-array */ 4794 for (i=0; i<nrecvs; i++){ 4795 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4796 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4797 } 4798 4799 /* pack the outgoing message j-array */ 4800 k = 0; 4801 for (i=0; i<nsends; i++){ 4802 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4803 bufJ = bufj+sstartsj[i]; 4804 for (j=0; j<nrows; j++) { 4805 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4806 for (ll=0; ll<sbs; ll++){ 4807 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4808 for (l=0; l<ncols; l++){ 4809 *bufJ++ = cols[l]; 4810 } 4811 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4812 } 4813 } 4814 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4815 } 4816 4817 /* recvs and sends of j-array are completed */ 4818 i = nrecvs; 4819 while (i--) { 4820 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4821 } 4822 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4823 } else if (scall == MAT_REUSE_MATRIX){ 4824 sstartsj = *startsj; 4825 rstartsj = sstartsj + nsends +1; 4826 bufa = *bufa_ptr; 4827 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4828 b_otha = b_oth->a; 4829 } else { 4830 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4831 } 4832 4833 /* a-array */ 4834 /*---------*/ 4835 /* post receives of a-array */ 4836 for (i=0; i<nrecvs; i++){ 4837 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4838 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4839 } 4840 4841 /* pack the outgoing message a-array */ 4842 k = 0; 4843 for (i=0; i<nsends; i++){ 4844 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4845 bufA = bufa+sstartsj[i]; 4846 for (j=0; j<nrows; j++) { 4847 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4848 for (ll=0; ll<sbs; ll++){ 4849 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4850 for (l=0; l<ncols; l++){ 4851 *bufA++ = vals[l]; 4852 } 4853 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4854 } 4855 } 4856 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4857 } 4858 /* recvs and sends of a-array are completed */ 4859 i = nrecvs; 4860 while (i--) { 4861 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4862 } 4863 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4864 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4865 4866 if (scall == MAT_INITIAL_MATRIX){ 4867 /* put together the new matrix */ 4868 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4869 4870 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4871 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4872 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4873 b_oth->free_a = PETSC_TRUE; 4874 b_oth->free_ij = PETSC_TRUE; 4875 b_oth->nonew = 0; 4876 4877 ierr = PetscFree(bufj);CHKERRQ(ierr); 4878 if (!startsj || !bufa_ptr){ 4879 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4880 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4881 } else { 4882 *startsj = sstartsj; 4883 *bufa_ptr = bufa; 4884 } 4885 } 4886 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatGetCommunicationStructs" 4892 /*@C 4893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4894 4895 Not Collective 4896 4897 Input Parameters: 4898 . A - The matrix in mpiaij format 4899 4900 Output Parameter: 4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4902 . colmap - A map from global column index to local index into lvec 4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4904 4905 Level: developer 4906 4907 @*/ 4908 #if defined (PETSC_USE_CTABLE) 4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4910 #else 4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4912 #endif 4913 { 4914 Mat_MPIAIJ *a; 4915 4916 PetscFunctionBegin; 4917 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4918 PetscValidPointer(lvec, 2) 4919 PetscValidPointer(colmap, 3) 4920 PetscValidPointer(multScatter, 4) 4921 a = (Mat_MPIAIJ *) A->data; 4922 if (lvec) *lvec = a->lvec; 4923 if (colmap) *colmap = a->colmap; 4924 if (multScatter) *multScatter = a->Mvctx; 4925 PetscFunctionReturn(0); 4926 } 4927 4928 EXTERN_C_BEGIN 4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4931 EXTERN_C_END 4932 4933 #include "../src/mat/impls/dense/mpi/mpidense.h" 4934 4935 #undef __FUNCT__ 4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4937 /* 4938 Computes (B'*A')' since computing B*A directly is untenable 4939 4940 n p p 4941 ( ) ( ) ( ) 4942 m ( A ) * n ( B ) = m ( C ) 4943 ( ) ( ) ( ) 4944 4945 */ 4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4947 { 4948 PetscErrorCode ierr; 4949 Mat At,Bt,Ct; 4950 4951 PetscFunctionBegin; 4952 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4953 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4954 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4955 ierr = MatDestroy(At);CHKERRQ(ierr); 4956 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4957 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4958 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4965 { 4966 PetscErrorCode ierr; 4967 PetscInt m=A->rmap->n,n=B->cmap->n; 4968 Mat Cmat; 4969 4970 PetscFunctionBegin; 4971 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4972 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4973 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4974 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4975 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4976 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4977 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4978 *C = Cmat; 4979 PetscFunctionReturn(0); 4980 } 4981 4982 /* ----------------------------------------------------------------*/ 4983 #undef __FUNCT__ 4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4986 { 4987 PetscErrorCode ierr; 4988 4989 PetscFunctionBegin; 4990 if (scall == MAT_INITIAL_MATRIX){ 4991 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4992 } 4993 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4994 PetscFunctionReturn(0); 4995 } 4996 4997 EXTERN_C_BEGIN 4998 #if defined(PETSC_HAVE_MUMPS) 4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5000 #endif 5001 #if defined(PETSC_HAVE_PASTIX) 5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5003 #endif 5004 #if defined(PETSC_HAVE_SUPERLU_DIST) 5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5006 #endif 5007 #if defined(PETSC_HAVE_SPOOLES) 5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5009 #endif 5010 EXTERN_C_END 5011 5012 /*MC 5013 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5014 5015 Options Database Keys: 5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5017 5018 Level: beginner 5019 5020 .seealso: MatCreateMPIAIJ() 5021 M*/ 5022 5023 EXTERN_C_BEGIN 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatCreate_MPIAIJ" 5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5027 { 5028 Mat_MPIAIJ *b; 5029 PetscErrorCode ierr; 5030 PetscMPIInt size; 5031 5032 PetscFunctionBegin; 5033 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5034 5035 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5036 B->data = (void*)b; 5037 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5038 B->rmap->bs = 1; 5039 B->assembled = PETSC_FALSE; 5040 B->mapping = 0; 5041 5042 B->insertmode = NOT_SET_VALUES; 5043 b->size = size; 5044 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5045 5046 /* build cache for off array entries formed */ 5047 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5048 b->donotstash = PETSC_FALSE; 5049 b->colmap = 0; 5050 b->garray = 0; 5051 b->roworiented = PETSC_TRUE; 5052 5053 /* stuff used for matrix vector multiply */ 5054 b->lvec = PETSC_NULL; 5055 b->Mvctx = PETSC_NULL; 5056 5057 /* stuff for MatGetRow() */ 5058 b->rowindices = 0; 5059 b->rowvalues = 0; 5060 b->getrowactive = PETSC_FALSE; 5061 5062 #if defined(PETSC_HAVE_SPOOLES) 5063 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C", 5064 "MatGetFactor_mpiaij_spooles", 5065 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5066 #endif 5067 #if defined(PETSC_HAVE_MUMPS) 5068 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C", 5069 "MatGetFactor_mpiaij_mumps", 5070 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5071 #endif 5072 #if defined(PETSC_HAVE_PASTIX) 5073 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C", 5074 "MatGetFactor_mpiaij_pastix", 5075 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5076 #endif 5077 #if defined(PETSC_HAVE_SUPERLU_DIST) 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C", 5079 "MatGetFactor_mpiaij_superlu_dist", 5080 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5081 #endif 5082 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5083 "MatStoreValues_MPIAIJ", 5084 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5086 "MatRetrieveValues_MPIAIJ", 5087 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5089 "MatGetDiagonalBlock_MPIAIJ", 5090 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5092 "MatIsTranspose_MPIAIJ", 5093 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5095 "MatMPIAIJSetPreallocation_MPIAIJ", 5096 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5098 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5099 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5101 "MatDiagonalScaleLocal_MPIAIJ", 5102 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5104 "MatConvert_MPIAIJ_MPICSRPERM", 5105 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5106 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5107 "MatConvert_MPIAIJ_MPICRL", 5108 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5110 "MatMatMult_MPIDense_MPIAIJ", 5111 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5112 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5113 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5114 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5116 "MatMatMultNumeric_MPIDense_MPIAIJ", 5117 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5118 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5119 PetscFunctionReturn(0); 5120 } 5121 EXTERN_C_END 5122 5123 #undef __FUNCT__ 5124 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5125 /*@ 5126 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5127 and "off-diagonal" part of the matrix in CSR format. 5128 5129 Collective on MPI_Comm 5130 5131 Input Parameters: 5132 + comm - MPI communicator 5133 . m - number of local rows (Cannot be PETSC_DECIDE) 5134 . n - This value should be the same as the local size used in creating the 5135 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5136 calculated if N is given) For square matrices n is almost always m. 5137 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5138 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5139 . i - row indices for "diagonal" portion of matrix 5140 . j - column indices 5141 . a - matrix values 5142 . oi - row indices for "off-diagonal" portion of matrix 5143 . oj - column indices 5144 - oa - matrix values 5145 5146 Output Parameter: 5147 . mat - the matrix 5148 5149 Level: advanced 5150 5151 Notes: 5152 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5153 5154 The i and j indices are 0 based 5155 5156 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5157 5158 This sets local rows and cannot be used to set off-processor values. 5159 5160 You cannot later use MatSetValues() to change values in this matrix. 5161 5162 .keywords: matrix, aij, compressed row, sparse, parallel 5163 5164 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5165 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5166 @*/ 5167 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5168 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5169 { 5170 PetscErrorCode ierr; 5171 Mat_MPIAIJ *maij; 5172 5173 PetscFunctionBegin; 5174 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5175 if (i[0]) { 5176 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5177 } 5178 if (oi[0]) { 5179 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5180 } 5181 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5182 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5183 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5184 maij = (Mat_MPIAIJ*) (*mat)->data; 5185 maij->donotstash = PETSC_TRUE; 5186 (*mat)->preallocated = PETSC_TRUE; 5187 5188 ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5189 ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5190 ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr); 5191 ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr); 5192 5193 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5194 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5195 5196 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5197 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5200 5201 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 PetscFunctionReturn(0); 5204 } 5205 5206 /* 5207 Special version for direct calls from Fortran 5208 */ 5209 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5210 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5211 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5212 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5213 #endif 5214 5215 /* Change these macros so can be used in void function */ 5216 #undef CHKERRQ 5217 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5218 #undef SETERRQ2 5219 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5220 #undef SETERRQ 5221 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5222 5223 EXTERN_C_BEGIN 5224 #undef __FUNCT__ 5225 #define __FUNCT__ "matsetvaluesmpiaij_" 5226 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5227 { 5228 Mat mat = *mmat; 5229 PetscInt m = *mm, n = *mn; 5230 InsertMode addv = *maddv; 5231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5232 PetscScalar value; 5233 PetscErrorCode ierr; 5234 5235 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5236 if (mat->insertmode == NOT_SET_VALUES) { 5237 mat->insertmode = addv; 5238 } 5239 #if defined(PETSC_USE_DEBUG) 5240 else if (mat->insertmode != addv) { 5241 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5242 } 5243 #endif 5244 { 5245 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5246 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5247 PetscTruth roworiented = aij->roworiented; 5248 5249 /* Some Variables required in the macro */ 5250 Mat A = aij->A; 5251 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5252 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5253 MatScalar *aa = a->a; 5254 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5255 Mat B = aij->B; 5256 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5257 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5258 MatScalar *ba = b->a; 5259 5260 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5261 PetscInt nonew = a->nonew; 5262 MatScalar *ap1,*ap2; 5263 5264 PetscFunctionBegin; 5265 for (i=0; i<m; i++) { 5266 if (im[i] < 0) continue; 5267 #if defined(PETSC_USE_DEBUG) 5268 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5269 #endif 5270 if (im[i] >= rstart && im[i] < rend) { 5271 row = im[i] - rstart; 5272 lastcol1 = -1; 5273 rp1 = aj + ai[row]; 5274 ap1 = aa + ai[row]; 5275 rmax1 = aimax[row]; 5276 nrow1 = ailen[row]; 5277 low1 = 0; 5278 high1 = nrow1; 5279 lastcol2 = -1; 5280 rp2 = bj + bi[row]; 5281 ap2 = ba + bi[row]; 5282 rmax2 = bimax[row]; 5283 nrow2 = bilen[row]; 5284 low2 = 0; 5285 high2 = nrow2; 5286 5287 for (j=0; j<n; j++) { 5288 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5289 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5290 if (in[j] >= cstart && in[j] < cend){ 5291 col = in[j] - cstart; 5292 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5293 } else if (in[j] < 0) continue; 5294 #if defined(PETSC_USE_DEBUG) 5295 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5296 #endif 5297 else { 5298 if (mat->was_assembled) { 5299 if (!aij->colmap) { 5300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5301 } 5302 #if defined (PETSC_USE_CTABLE) 5303 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5304 col--; 5305 #else 5306 col = aij->colmap[in[j]] - 1; 5307 #endif 5308 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5309 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5310 col = in[j]; 5311 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5312 B = aij->B; 5313 b = (Mat_SeqAIJ*)B->data; 5314 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5315 rp2 = bj + bi[row]; 5316 ap2 = ba + bi[row]; 5317 rmax2 = bimax[row]; 5318 nrow2 = bilen[row]; 5319 low2 = 0; 5320 high2 = nrow2; 5321 bm = aij->B->rmap->n; 5322 ba = b->a; 5323 } 5324 } else col = in[j]; 5325 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5326 } 5327 } 5328 } else { 5329 if (!aij->donotstash) { 5330 if (roworiented) { 5331 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5332 } else { 5333 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5334 } 5335 } 5336 } 5337 }} 5338 PetscFunctionReturnVoid(); 5339 } 5340 EXTERN_C_END 5341 5342