1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 if (v) PetscValidScalarPointer(v,6); 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 551 { 552 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 557 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatZeroRows_MPIAIJ" 563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 564 { 565 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 568 PetscInt i,*owners = A->rmap->range; 569 PetscInt *nprocs,j,idx,nsends,row; 570 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 571 PetscInt *rvalues,count,base,slen,*source; 572 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 573 MPI_Comm comm = ((PetscObject)A)->comm; 574 MPI_Request *send_waits,*recv_waits; 575 MPI_Status recv_status,*send_status; 576 #if defined(PETSC_DEBUG) 577 PetscTruth found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 /* inform other processors of number of messages and max length*/ 608 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 609 610 /* post receives: */ 611 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 612 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 613 for (i=0; i<nrecvs; i++) { 614 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 615 } 616 617 /* do sends: 618 1) starts[i] gives the starting index in svalues for stuff going to 619 the ith processor 620 */ 621 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 622 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 623 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 624 starts[0] = 0; 625 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 626 for (i=0; i<N; i++) { 627 svalues[starts[owner[i]]++] = rows[i]; 628 } 629 630 starts[0] = 0; 631 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 count = 0; 633 for (i=0; i<size; i++) { 634 if (nprocs[2*i+1]) { 635 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscFree(starts);CHKERRQ(ierr); 639 640 base = owners[rank]; 641 642 /* wait on receives */ 643 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 644 source = lens + nrecvs; 645 count = nrecvs; slen = 0; 646 while (count) { 647 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 648 /* unpack receives into our local space */ 649 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 650 source[imdex] = recv_status.MPI_SOURCE; 651 lens[imdex] = n; 652 slen += n; 653 count--; 654 } 655 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 656 657 /* move the data into the send scatter */ 658 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 659 count = 0; 660 for (i=0; i<nrecvs; i++) { 661 values = rvalues + i*nmax; 662 for (j=0; j<lens[i]; j++) { 663 lrows[count++] = values[j] - base; 664 } 665 } 666 ierr = PetscFree(rvalues);CHKERRQ(ierr); 667 ierr = PetscFree(lens);CHKERRQ(ierr); 668 ierr = PetscFree(owner);CHKERRQ(ierr); 669 ierr = PetscFree(nprocs);CHKERRQ(ierr); 670 671 /* actually zap the local rows */ 672 /* 673 Zero the required rows. If the "diagonal block" of the matrix 674 is square and the user wishes to set the diagonal we use separate 675 code so that MatSetValues() is not called for each diagonal allocating 676 new memory, thus calling lots of mallocs and slowing things down. 677 678 */ 679 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 680 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 681 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 682 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 683 } else if (diag != 0.0) { 684 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 685 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 686 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 688 } 689 for (i = 0; i < slen; i++) { 690 row = lrows[i] + rstart; 691 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 } else { 696 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 697 } 698 ierr = PetscFree(lrows);CHKERRQ(ierr); 699 700 /* wait on sends */ 701 if (nsends) { 702 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 703 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 704 ierr = PetscFree(send_status);CHKERRQ(ierr); 705 } 706 ierr = PetscFree(send_waits);CHKERRQ(ierr); 707 ierr = PetscFree(svalues);CHKERRQ(ierr); 708 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatMult_MPIAIJ" 714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscInt nt; 719 720 PetscFunctionBegin; 721 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 722 if (nt != A->cmap->n) { 723 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 724 } 725 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 727 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 735 { 736 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatMultAdd_MPIAIJ" 746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 747 { 748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 754 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 762 { 763 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 PetscTruth merged; 766 767 PetscFunctionBegin; 768 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 769 /* do nondiagonal part */ 770 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 771 if (!merged) { 772 /* send it on its way */ 773 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 774 /* do local part */ 775 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 776 /* receive remote parts: note this assumes the values are not actually */ 777 /* added in yy until the next line, */ 778 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 779 } else { 780 /* do local part */ 781 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 782 /* send it on its way */ 783 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 /* values actually were received in the Begin() but we need to call this nop */ 785 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 EXTERN_C_BEGIN 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 794 { 795 MPI_Comm comm; 796 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 797 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 798 IS Me,Notme; 799 PetscErrorCode ierr; 800 PetscInt M,N,first,last,*notme,i; 801 PetscMPIInt size; 802 803 PetscFunctionBegin; 804 805 /* Easy test: symmetric diagonal block */ 806 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 807 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 808 if (!*f) PetscFunctionReturn(0); 809 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 810 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 811 if (size == 1) PetscFunctionReturn(0); 812 813 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 814 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 815 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 816 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 817 for (i=0; i<first; i++) notme[i] = i; 818 for (i=last; i<M; i++) notme[i-last+first] = i; 819 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 820 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 821 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 822 Aoff = Aoffs[0]; 823 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 824 Boff = Boffs[0]; 825 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 828 ierr = ISDestroy(Me);CHKERRQ(ierr); 829 ierr = ISDestroy(Notme);CHKERRQ(ierr); 830 831 PetscFunctionReturn(0); 832 } 833 EXTERN_C_END 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 838 { 839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 /* do nondiagonal part */ 844 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 845 /* send it on its way */ 846 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 847 /* do local part */ 848 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 849 /* receive remote parts */ 850 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 /* 855 This only works correctly for square matrices where the subblock A->A is the 856 diagonal block 857 */ 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 861 { 862 PetscErrorCode ierr; 863 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 864 865 PetscFunctionBegin; 866 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 867 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 868 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 } 870 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatScale_MPIAIJ" 876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 877 { 878 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 883 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatDestroy_MPIAIJ" 889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 890 { 891 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 892 PetscErrorCode ierr; 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_LOG) 896 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 897 #endif 898 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 899 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 900 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 901 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 902 #if defined (PETSC_USE_CTABLE) 903 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 904 #else 905 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 906 #endif 907 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 908 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 909 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 910 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 911 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 912 ierr = PetscFree(aij);CHKERRQ(ierr); 913 914 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 #undef __FUNCT__ 927 #define __FUNCT__ "MatView_MPIAIJ_Binary" 928 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 929 { 930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 931 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 932 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 933 PetscErrorCode ierr; 934 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 935 int fd; 936 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 937 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 938 PetscScalar *column_values; 939 940 PetscFunctionBegin; 941 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 942 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 943 nz = A->nz + B->nz; 944 if (!rank) { 945 header[0] = MAT_FILE_COOKIE; 946 header[1] = mat->rmap->N; 947 header[2] = mat->cmap->N; 948 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 949 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 950 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 951 /* get largest number of rows any processor has */ 952 rlen = mat->rmap->n; 953 range = mat->rmap->range; 954 for (i=1; i<size; i++) { 955 rlen = PetscMax(rlen,range[i+1] - range[i]); 956 } 957 } else { 958 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 959 rlen = mat->rmap->n; 960 } 961 962 /* load up the local row counts */ 963 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 964 for (i=0; i<mat->rmap->n; i++) { 965 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 966 } 967 968 /* store the row lengths to the file */ 969 if (!rank) { 970 MPI_Status status; 971 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 972 for (i=1; i<size; i++) { 973 rlen = range[i+1] - range[i]; 974 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 975 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 976 } 977 } else { 978 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 979 } 980 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 981 982 /* load up the local column indices */ 983 nzmax = nz; /* )th processor needs space a largest processor needs */ 984 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 985 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 986 cnt = 0; 987 for (i=0; i<mat->rmap->n; i++) { 988 for (j=B->i[i]; j<B->i[i+1]; j++) { 989 if ( (col = garray[B->j[j]]) > cstart) break; 990 column_indices[cnt++] = col; 991 } 992 for (k=A->i[i]; k<A->i[i+1]; k++) { 993 column_indices[cnt++] = A->j[k] + cstart; 994 } 995 for (; j<B->i[i+1]; j++) { 996 column_indices[cnt++] = garray[B->j[j]]; 997 } 998 } 999 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1000 1001 /* store the column indices to the file */ 1002 if (!rank) { 1003 MPI_Status status; 1004 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1005 for (i=1; i<size; i++) { 1006 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1007 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1008 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1009 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1010 } 1011 } else { 1012 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1014 } 1015 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1016 1017 /* load up the local column values */ 1018 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1019 cnt = 0; 1020 for (i=0; i<mat->rmap->n; i++) { 1021 for (j=B->i[i]; j<B->i[i+1]; j++) { 1022 if ( garray[B->j[j]] > cstart) break; 1023 column_values[cnt++] = B->a[j]; 1024 } 1025 for (k=A->i[i]; k<A->i[i+1]; k++) { 1026 column_values[cnt++] = A->a[k]; 1027 } 1028 for (; j<B->i[i+1]; j++) { 1029 column_values[cnt++] = B->a[j]; 1030 } 1031 } 1032 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1033 1034 /* store the column values to the file */ 1035 if (!rank) { 1036 MPI_Status status; 1037 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1038 for (i=1; i<size; i++) { 1039 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1040 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1041 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1042 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1043 } 1044 } else { 1045 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1047 } 1048 ierr = PetscFree(column_values);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1054 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1055 { 1056 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1057 PetscErrorCode ierr; 1058 PetscMPIInt rank = aij->rank,size = aij->size; 1059 PetscTruth isdraw,iascii,isbinary; 1060 PetscViewer sviewer; 1061 PetscViewerFormat format; 1062 1063 PetscFunctionBegin; 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1066 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1067 if (iascii) { 1068 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1069 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1070 MatInfo info; 1071 PetscTruth inodes; 1072 1073 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1074 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1075 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1076 if (!inodes) { 1077 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1078 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1079 } else { 1080 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1081 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1082 } 1083 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1084 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1085 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1086 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1087 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1089 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1092 PetscInt inodecount,inodelimit,*inodes; 1093 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1094 if (inodes) { 1095 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1096 } else { 1097 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1098 } 1099 PetscFunctionReturn(0); 1100 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1101 PetscFunctionReturn(0); 1102 } 1103 } else if (isbinary) { 1104 if (size == 1) { 1105 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1106 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1107 } else { 1108 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1109 } 1110 PetscFunctionReturn(0); 1111 } else if (isdraw) { 1112 PetscDraw draw; 1113 PetscTruth isnull; 1114 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1115 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1116 } 1117 1118 if (size == 1) { 1119 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1120 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1121 } else { 1122 /* assemble the entire matrix onto first processor. */ 1123 Mat A; 1124 Mat_SeqAIJ *Aloc; 1125 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1126 MatScalar *a; 1127 1128 if (mat->rmap->N > 1024) { 1129 PetscTruth flg = PETSC_FALSE; 1130 1131 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1132 if (!flg) { 1133 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1134 } 1135 } 1136 1137 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1138 if (!rank) { 1139 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1140 } else { 1141 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1142 } 1143 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1144 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1145 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1146 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1147 1148 /* copy over the A part */ 1149 Aloc = (Mat_SeqAIJ*)aij->A->data; 1150 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1151 row = mat->rmap->rstart; 1152 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1153 for (i=0; i<m; i++) { 1154 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1155 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1156 } 1157 aj = Aloc->j; 1158 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1159 1160 /* copy over the B part */ 1161 Aloc = (Mat_SeqAIJ*)aij->B->data; 1162 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1163 row = mat->rmap->rstart; 1164 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1165 ct = cols; 1166 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1167 for (i=0; i<m; i++) { 1168 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1169 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1170 } 1171 ierr = PetscFree(ct);CHKERRQ(ierr); 1172 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1174 /* 1175 Everyone has to call to draw the matrix since the graphics waits are 1176 synchronized across all processors that share the PetscDraw object 1177 */ 1178 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1179 if (!rank) { 1180 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1181 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1184 ierr = MatDestroy(A);CHKERRQ(ierr); 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatView_MPIAIJ" 1191 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1192 { 1193 PetscErrorCode ierr; 1194 PetscTruth iascii,isdraw,issocket,isbinary; 1195 1196 PetscFunctionBegin; 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1201 if (iascii || isdraw || isbinary || issocket) { 1202 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1203 } else { 1204 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatSOR_MPIAIJ" 1211 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1212 { 1213 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1214 PetscErrorCode ierr; 1215 Vec bb1 = 0; 1216 PetscTruth hasop; 1217 1218 PetscFunctionBegin; 1219 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1220 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1221 } 1222 1223 if (flag == SOR_APPLY_UPPER) { 1224 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1229 if (flag & SOR_ZERO_INITIAL_GUESS) { 1230 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1231 its--; 1232 } 1233 1234 while (its--) { 1235 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 1238 /* update rhs: bb1 = bb - B*x */ 1239 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1240 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1241 1242 /* local sweep */ 1243 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1244 } 1245 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1246 if (flag & SOR_ZERO_INITIAL_GUESS) { 1247 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1248 its--; 1249 } 1250 while (its--) { 1251 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 1254 /* update rhs: bb1 = bb - B*x */ 1255 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1256 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1257 1258 /* local sweep */ 1259 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1260 } 1261 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1262 if (flag & SOR_ZERO_INITIAL_GUESS) { 1263 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1264 its--; 1265 } 1266 while (its--) { 1267 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 1270 /* update rhs: bb1 = bb - B*x */ 1271 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1272 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1273 1274 /* local sweep */ 1275 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1276 } 1277 } else if (flag & SOR_EISENSTAT) { 1278 Vec xx1; 1279 1280 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1281 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1282 1283 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 if (!mat->diag) { 1286 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1287 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1288 } 1289 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1290 if (hasop) { 1291 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1292 } else { 1293 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1294 } 1295 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1296 1297 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1298 1299 /* local sweep */ 1300 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1301 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1302 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1303 } else { 1304 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1305 } 1306 1307 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatPermute_MPIAIJ" 1313 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1314 { 1315 MPI_Comm comm,pcomm; 1316 PetscInt first,local_size,nrows; 1317 const PetscInt *rows; 1318 PetscMPIInt size; 1319 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1324 /* make a collective version of 'rowp' */ 1325 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1326 if (pcomm==comm) { 1327 crowp = rowp; 1328 } else { 1329 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1330 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1331 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1332 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1333 } 1334 /* collect the global row permutation and invert it */ 1335 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1336 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1337 if (pcomm!=comm) { 1338 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1339 } 1340 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1341 /* get the local target indices */ 1342 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1344 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1345 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1346 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1347 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1348 /* the column permutation is so much easier; 1349 make a local version of 'colp' and invert it */ 1350 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1351 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1352 if (size==1) { 1353 lcolp = colp; 1354 } else { 1355 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1356 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1357 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1358 } 1359 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1360 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1361 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1362 if (size>1) { 1363 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1364 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1365 } 1366 /* now we just get the submatrix */ 1367 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1368 /* clean up */ 1369 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1370 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1376 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1377 { 1378 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1379 Mat A = mat->A,B = mat->B; 1380 PetscErrorCode ierr; 1381 PetscReal isend[5],irecv[5]; 1382 1383 PetscFunctionBegin; 1384 info->block_size = 1.0; 1385 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1386 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1387 isend[3] = info->memory; isend[4] = info->mallocs; 1388 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1389 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1390 isend[3] += info->memory; isend[4] += info->mallocs; 1391 if (flag == MAT_LOCAL) { 1392 info->nz_used = isend[0]; 1393 info->nz_allocated = isend[1]; 1394 info->nz_unneeded = isend[2]; 1395 info->memory = isend[3]; 1396 info->mallocs = isend[4]; 1397 } else if (flag == MAT_GLOBAL_MAX) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else if (flag == MAT_GLOBAL_SUM) { 1405 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1406 info->nz_used = irecv[0]; 1407 info->nz_allocated = irecv[1]; 1408 info->nz_unneeded = irecv[2]; 1409 info->memory = irecv[3]; 1410 info->mallocs = irecv[4]; 1411 } 1412 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1413 info->fill_ratio_needed = 0; 1414 info->factor_mallocs = 0; 1415 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatSetOption_MPIAIJ" 1421 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1422 { 1423 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 switch (op) { 1428 case MAT_NEW_NONZERO_LOCATIONS: 1429 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1430 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1431 case MAT_KEEP_NONZERO_PATTERN: 1432 case MAT_NEW_NONZERO_LOCATION_ERR: 1433 case MAT_USE_INODES: 1434 case MAT_IGNORE_ZERO_ENTRIES: 1435 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1436 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1437 break; 1438 case MAT_ROW_ORIENTED: 1439 a->roworiented = flg; 1440 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1441 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1442 break; 1443 case MAT_NEW_DIAGONALS: 1444 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1445 break; 1446 case MAT_IGNORE_OFF_PROC_ENTRIES: 1447 a->donotstash = PETSC_TRUE; 1448 break; 1449 case MAT_SYMMETRIC: 1450 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1451 break; 1452 case MAT_STRUCTURALLY_SYMMETRIC: 1453 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1454 break; 1455 case MAT_HERMITIAN: 1456 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1457 break; 1458 case MAT_SYMMETRY_ETERNAL: 1459 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1460 break; 1461 default: 1462 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatGetRow_MPIAIJ" 1469 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1470 { 1471 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1472 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1473 PetscErrorCode ierr; 1474 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1475 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1476 PetscInt *cmap,*idx_p; 1477 1478 PetscFunctionBegin; 1479 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1480 mat->getrowactive = PETSC_TRUE; 1481 1482 if (!mat->rowvalues && (idx || v)) { 1483 /* 1484 allocate enough space to hold information from the longest row. 1485 */ 1486 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1487 PetscInt max = 1,tmp; 1488 for (i=0; i<matin->rmap->n; i++) { 1489 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1490 if (max < tmp) { max = tmp; } 1491 } 1492 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1493 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1494 } 1495 1496 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1497 lrow = row - rstart; 1498 1499 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1500 if (!v) {pvA = 0; pvB = 0;} 1501 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1502 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1504 nztot = nzA + nzB; 1505 1506 cmap = mat->garray; 1507 if (v || idx) { 1508 if (nztot) { 1509 /* Sort by increasing column numbers, assuming A and B already sorted */ 1510 PetscInt imark = -1; 1511 if (v) { 1512 *v = v_p = mat->rowvalues; 1513 for (i=0; i<nzB; i++) { 1514 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1515 else break; 1516 } 1517 imark = i; 1518 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1519 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1520 } 1521 if (idx) { 1522 *idx = idx_p = mat->rowindices; 1523 if (imark > -1) { 1524 for (i=0; i<imark; i++) { 1525 idx_p[i] = cmap[cworkB[i]]; 1526 } 1527 } else { 1528 for (i=0; i<nzB; i++) { 1529 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1530 else break; 1531 } 1532 imark = i; 1533 } 1534 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1535 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1536 } 1537 } else { 1538 if (idx) *idx = 0; 1539 if (v) *v = 0; 1540 } 1541 } 1542 *nz = nztot; 1543 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1544 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1550 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1551 { 1552 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1553 1554 PetscFunctionBegin; 1555 if (!aij->getrowactive) { 1556 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1557 } 1558 aij->getrowactive = PETSC_FALSE; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatNorm_MPIAIJ" 1564 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1565 { 1566 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1567 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1568 PetscErrorCode ierr; 1569 PetscInt i,j,cstart = mat->cmap->rstart; 1570 PetscReal sum = 0.0; 1571 MatScalar *v; 1572 1573 PetscFunctionBegin; 1574 if (aij->size == 1) { 1575 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1576 } else { 1577 if (type == NORM_FROBENIUS) { 1578 v = amat->a; 1579 for (i=0; i<amat->nz; i++) { 1580 #if defined(PETSC_USE_COMPLEX) 1581 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1582 #else 1583 sum += (*v)*(*v); v++; 1584 #endif 1585 } 1586 v = bmat->a; 1587 for (i=0; i<bmat->nz; i++) { 1588 #if defined(PETSC_USE_COMPLEX) 1589 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1590 #else 1591 sum += (*v)*(*v); v++; 1592 #endif 1593 } 1594 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1595 *norm = sqrt(*norm); 1596 } else if (type == NORM_1) { /* max column norm */ 1597 PetscReal *tmp,*tmp2; 1598 PetscInt *jj,*garray = aij->garray; 1599 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1600 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1601 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1602 *norm = 0.0; 1603 v = amat->a; jj = amat->j; 1604 for (j=0; j<amat->nz; j++) { 1605 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1606 } 1607 v = bmat->a; jj = bmat->j; 1608 for (j=0; j<bmat->nz; j++) { 1609 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1610 } 1611 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1612 for (j=0; j<mat->cmap->N; j++) { 1613 if (tmp2[j] > *norm) *norm = tmp2[j]; 1614 } 1615 ierr = PetscFree(tmp);CHKERRQ(ierr); 1616 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1617 } else if (type == NORM_INFINITY) { /* max row norm */ 1618 PetscReal ntemp = 0.0; 1619 for (j=0; j<aij->A->rmap->n; j++) { 1620 v = amat->a + amat->i[j]; 1621 sum = 0.0; 1622 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1623 sum += PetscAbsScalar(*v); v++; 1624 } 1625 v = bmat->a + bmat->i[j]; 1626 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1627 sum += PetscAbsScalar(*v); v++; 1628 } 1629 if (sum > ntemp) ntemp = sum; 1630 } 1631 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1632 } else { 1633 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1634 } 1635 } 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #undef __FUNCT__ 1640 #define __FUNCT__ "MatTranspose_MPIAIJ" 1641 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1642 { 1643 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1644 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1645 PetscErrorCode ierr; 1646 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1647 PetscInt cstart=A->cmap->rstart,ncol; 1648 Mat B; 1649 MatScalar *array; 1650 1651 PetscFunctionBegin; 1652 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1653 1654 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1655 ai = Aloc->i; aj = Aloc->j; 1656 bi = Bloc->i; bj = Bloc->j; 1657 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1658 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1659 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1660 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1661 for (i=0; i<ai[ma]; i++){ 1662 d_nnz[aj[i]] ++; 1663 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1664 } 1665 1666 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1667 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1668 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1669 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1670 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1671 } else { 1672 B = *matout; 1673 } 1674 1675 /* copy over the A part */ 1676 array = Aloc->a; 1677 row = A->rmap->rstart; 1678 for (i=0; i<ma; i++) { 1679 ncol = ai[i+1]-ai[i]; 1680 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1681 row++; array += ncol; aj += ncol; 1682 } 1683 aj = Aloc->j; 1684 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1685 1686 /* copy over the B part */ 1687 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1688 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1689 array = Bloc->a; 1690 row = A->rmap->rstart; 1691 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1692 cols_tmp = cols; 1693 for (i=0; i<mb; i++) { 1694 ncol = bi[i+1]-bi[i]; 1695 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1696 row++; array += ncol; cols_tmp += ncol; 1697 } 1698 ierr = PetscFree(cols);CHKERRQ(ierr); 1699 1700 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1702 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1703 *matout = B; 1704 } else { 1705 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1706 } 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1712 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1715 Mat a = aij->A,b = aij->B; 1716 PetscErrorCode ierr; 1717 PetscInt s1,s2,s3; 1718 1719 PetscFunctionBegin; 1720 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1721 if (rr) { 1722 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1723 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1724 /* Overlap communication with computation. */ 1725 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 } 1727 if (ll) { 1728 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1729 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1730 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1731 } 1732 /* scale the diagonal block */ 1733 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1734 1735 if (rr) { 1736 /* Do a scatter end and then right scale the off-diagonal block */ 1737 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1738 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1739 } 1740 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1746 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1747 { 1748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1749 PetscErrorCode ierr; 1750 1751 PetscFunctionBegin; 1752 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1753 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1754 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1755 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1760 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1761 { 1762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatEqual_MPIAIJ" 1772 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1773 { 1774 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1775 Mat a,b,c,d; 1776 PetscTruth flg; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 a = matA->A; b = matA->B; 1781 c = matB->A; d = matB->B; 1782 1783 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1784 if (flg) { 1785 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1786 } 1787 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatCopy_MPIAIJ" 1793 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1794 { 1795 PetscErrorCode ierr; 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1797 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1798 1799 PetscFunctionBegin; 1800 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1801 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1802 /* because of the column compression in the off-processor part of the matrix a->B, 1803 the number of columns in a->B and b->B may be different, hence we cannot call 1804 the MatCopy() directly on the two parts. If need be, we can provide a more 1805 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1806 then copying the submatrices */ 1807 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1808 } else { 1809 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1810 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1811 } 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1817 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1818 { 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #include "petscblaslapack.h" 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatAXPY_MPIAIJ" 1829 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1830 { 1831 PetscErrorCode ierr; 1832 PetscInt i; 1833 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1834 PetscBLASInt bnz,one=1; 1835 Mat_SeqAIJ *x,*y; 1836 1837 PetscFunctionBegin; 1838 if (str == SAME_NONZERO_PATTERN) { 1839 PetscScalar alpha = a; 1840 x = (Mat_SeqAIJ *)xx->A->data; 1841 y = (Mat_SeqAIJ *)yy->A->data; 1842 bnz = PetscBLASIntCast(x->nz); 1843 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1844 x = (Mat_SeqAIJ *)xx->B->data; 1845 y = (Mat_SeqAIJ *)yy->B->data; 1846 bnz = PetscBLASIntCast(x->nz); 1847 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1848 } else if (str == SUBSET_NONZERO_PATTERN) { 1849 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1850 1851 x = (Mat_SeqAIJ *)xx->B->data; 1852 y = (Mat_SeqAIJ *)yy->B->data; 1853 if (y->xtoy && y->XtoY != xx->B) { 1854 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1855 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1856 } 1857 if (!y->xtoy) { /* get xtoy */ 1858 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1859 y->XtoY = xx->B; 1860 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1861 } 1862 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1863 } else { 1864 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatConjugate_MPIAIJ" 1873 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1874 { 1875 #if defined(PETSC_USE_COMPLEX) 1876 PetscErrorCode ierr; 1877 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1878 1879 PetscFunctionBegin; 1880 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1881 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1882 #else 1883 PetscFunctionBegin; 1884 #endif 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatRealPart_MPIAIJ" 1890 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1891 { 1892 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1893 PetscErrorCode ierr; 1894 1895 PetscFunctionBegin; 1896 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1897 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1903 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1904 { 1905 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1910 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #ifdef PETSC_HAVE_PBGL 1915 1916 #include <boost/parallel/mpi/bsp_process_group.hpp> 1917 #include <boost/graph/distributed/ilu_default_graph.hpp> 1918 #include <boost/graph/distributed/ilu_0_block.hpp> 1919 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1920 #include <boost/graph/distributed/petsc/interface.hpp> 1921 #include <boost/multi_array.hpp> 1922 #include <boost/parallel/distributed_property_map->hpp> 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1926 /* 1927 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1928 */ 1929 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1930 { 1931 namespace petsc = boost::distributed::petsc; 1932 1933 namespace graph_dist = boost::graph::distributed; 1934 using boost::graph::distributed::ilu_default::process_group_type; 1935 using boost::graph::ilu_permuted; 1936 1937 PetscTruth row_identity, col_identity; 1938 PetscContainer c; 1939 PetscInt m, n, M, N; 1940 PetscErrorCode ierr; 1941 1942 PetscFunctionBegin; 1943 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1944 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1945 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1946 if (!row_identity || !col_identity) { 1947 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1948 } 1949 1950 process_group_type pg; 1951 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1952 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1953 lgraph_type& level_graph = *lgraph_p; 1954 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1955 1956 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1957 ilu_permuted(level_graph); 1958 1959 /* put together the new matrix */ 1960 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1961 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1962 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1963 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1964 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1965 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1966 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 1968 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1969 ierr = PetscContainerSetPointer(c, lgraph_p); 1970 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1976 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1977 { 1978 PetscFunctionBegin; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "MatSolve_MPIAIJ" 1984 /* 1985 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1986 */ 1987 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1988 { 1989 namespace graph_dist = boost::graph::distributed; 1990 1991 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1992 lgraph_type* lgraph_p; 1993 PetscContainer c; 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1998 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1999 ierr = VecCopy(b, x);CHKERRQ(ierr); 2000 2001 PetscScalar* array_x; 2002 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2003 PetscInt sx; 2004 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2005 2006 PetscScalar* array_b; 2007 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2008 PetscInt sb; 2009 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2010 2011 lgraph_type& level_graph = *lgraph_p; 2012 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2013 2014 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2015 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2016 ref_x(array_x, boost::extents[num_vertices(graph)]); 2017 2018 typedef boost::iterator_property_map<array_ref_type::iterator, 2019 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2020 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2021 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2022 2023 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2024 2025 PetscFunctionReturn(0); 2026 } 2027 #endif 2028 2029 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2030 PetscInt nzlocal,nsends,nrecvs; 2031 PetscMPIInt *send_rank; 2032 PetscInt *sbuf_nz,*sbuf_j,**rbuf_j; 2033 PetscScalar *sbuf_a,**rbuf_a; 2034 PetscErrorCode (*MatDestroy)(Mat); 2035 } Mat_Redundant; 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2039 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2040 { 2041 PetscErrorCode ierr; 2042 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2043 PetscInt i; 2044 2045 PetscFunctionBegin; 2046 ierr = PetscFree(redund->send_rank);CHKERRQ(ierr); 2047 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2048 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2049 for (i=0; i<redund->nrecvs; i++){ 2050 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2051 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2052 } 2053 ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2054 ierr = PetscFree(redund);CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatDestroy_MatRedundant" 2060 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2061 { 2062 PetscErrorCode ierr; 2063 PetscContainer container; 2064 Mat_Redundant *redund=PETSC_NULL; 2065 2066 PetscFunctionBegin; 2067 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2068 if (container) { 2069 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2070 } else { 2071 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2072 } 2073 A->ops->destroy = redund->MatDestroy; 2074 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2075 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2076 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 #undef __FUNCT__ 2081 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2082 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2083 { 2084 PetscMPIInt rank,size; 2085 MPI_Comm comm=((PetscObject)mat)->comm; 2086 PetscErrorCode ierr; 2087 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2088 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2089 PetscInt *rowrange=mat->rmap->range; 2090 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2091 Mat A=aij->A,B=aij->B,C=*matredundant; 2092 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2093 PetscScalar *sbuf_a; 2094 PetscInt nzlocal=a->nz+b->nz; 2095 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2096 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2097 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2098 MatScalar *aworkA,*aworkB; 2099 PetscScalar *vals; 2100 PetscMPIInt tag1,tag2,tag3,imdex; 2101 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2102 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2103 MPI_Status recv_status,*send_status; 2104 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2105 PetscInt **rbuf_j=PETSC_NULL; 2106 PetscScalar **rbuf_a=PETSC_NULL; 2107 Mat_Redundant *redund=PETSC_NULL; 2108 PetscContainer container; 2109 2110 PetscFunctionBegin; 2111 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2113 2114 if (reuse == MAT_REUSE_MATRIX) { 2115 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2116 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2117 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2118 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2119 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2120 if (container) { 2121 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2122 } else { 2123 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2124 } 2125 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2126 2127 nsends = redund->nsends; 2128 nrecvs = redund->nrecvs; 2129 send_rank = redund->send_rank; recv_rank = send_rank + size; 2130 sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends; 2131 sbuf_j = redund->sbuf_j; 2132 sbuf_a = redund->sbuf_a; 2133 rbuf_j = redund->rbuf_j; 2134 rbuf_a = redund->rbuf_a; 2135 } 2136 2137 if (reuse == MAT_INITIAL_MATRIX){ 2138 PetscMPIInt subrank,subsize; 2139 PetscInt nleftover,np_subcomm; 2140 /* get the destination processors' id send_rank, nsends and nrecvs */ 2141 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2143 ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank); 2144 recv_rank = send_rank + size; 2145 np_subcomm = size/nsubcomm; 2146 nleftover = size - nsubcomm*np_subcomm; 2147 nsends = 0; nrecvs = 0; 2148 for (i=0; i<size; i++){ /* i=rank*/ 2149 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2150 send_rank[nsends] = i; nsends++; 2151 recv_rank[nrecvs++] = i; 2152 } 2153 } 2154 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2155 i = size-nleftover-1; 2156 j = 0; 2157 while (j < nsubcomm - nleftover){ 2158 send_rank[nsends++] = i; 2159 i--; j++; 2160 } 2161 } 2162 2163 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2164 for (i=0; i<nleftover; i++){ 2165 recv_rank[nrecvs++] = size-nleftover+i; 2166 } 2167 } 2168 2169 /* allocate sbuf_j, sbuf_a */ 2170 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2171 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2172 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2173 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2174 2175 /* copy mat's local entries into the buffers */ 2176 if (reuse == MAT_INITIAL_MATRIX){ 2177 rownz_max = 0; 2178 rptr = sbuf_j; 2179 cols = sbuf_j + rend-rstart + 1; 2180 vals = sbuf_a; 2181 rptr[0] = 0; 2182 for (i=0; i<rend-rstart; i++){ 2183 row = i + rstart; 2184 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2185 ncols = nzA + nzB; 2186 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2187 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2188 /* load the column indices for this row into cols */ 2189 lwrite = 0; 2190 for (l=0; l<nzB; l++) { 2191 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2192 vals[lwrite] = aworkB[l]; 2193 cols[lwrite++] = ctmp; 2194 } 2195 } 2196 for (l=0; l<nzA; l++){ 2197 vals[lwrite] = aworkA[l]; 2198 cols[lwrite++] = cstart + cworkA[l]; 2199 } 2200 for (l=0; l<nzB; l++) { 2201 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2202 vals[lwrite] = aworkB[l]; 2203 cols[lwrite++] = ctmp; 2204 } 2205 } 2206 vals += ncols; 2207 cols += ncols; 2208 rptr[i+1] = rptr[i] + ncols; 2209 if (rownz_max < ncols) rownz_max = ncols; 2210 } 2211 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2212 } else { /* only copy matrix values into sbuf_a */ 2213 rptr = sbuf_j; 2214 vals = sbuf_a; 2215 rptr[0] = 0; 2216 for (i=0; i<rend-rstart; i++){ 2217 row = i + rstart; 2218 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2219 ncols = nzA + nzB; 2220 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2221 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2222 lwrite = 0; 2223 for (l=0; l<nzB; l++) { 2224 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2225 } 2226 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2227 for (l=0; l<nzB; l++) { 2228 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2229 } 2230 vals += ncols; 2231 rptr[i+1] = rptr[i] + ncols; 2232 } 2233 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2234 2235 /* send nzlocal to others, and recv other's nzlocal */ 2236 /*--------------------------------------------------*/ 2237 if (reuse == MAT_INITIAL_MATRIX){ 2238 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2239 s_waits2 = s_waits3 + nsends; 2240 s_waits1 = s_waits2 + nsends; 2241 r_waits1 = s_waits1 + nsends; 2242 r_waits2 = r_waits1 + nrecvs; 2243 r_waits3 = r_waits2 + nrecvs; 2244 } else { 2245 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2246 r_waits3 = s_waits3 + nsends; 2247 } 2248 2249 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2250 if (reuse == MAT_INITIAL_MATRIX){ 2251 /* get new tags to keep the communication clean */ 2252 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2253 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2254 ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2255 rbuf_nz = sbuf_nz + nsends; 2256 2257 /* post receives of other's nzlocal */ 2258 for (i=0; i<nrecvs; i++){ 2259 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2260 } 2261 /* send nzlocal to others */ 2262 for (i=0; i<nsends; i++){ 2263 sbuf_nz[i] = nzlocal; 2264 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2265 } 2266 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2267 count = nrecvs; 2268 while (count) { 2269 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2270 recv_rank[imdex] = recv_status.MPI_SOURCE; 2271 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2272 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2273 2274 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2275 rbuf_nz[imdex] += i + 2; 2276 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2277 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2278 count--; 2279 } 2280 /* wait on sends of nzlocal */ 2281 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2282 /* send mat->i,j to others, and recv from other's */ 2283 /*------------------------------------------------*/ 2284 for (i=0; i<nsends; i++){ 2285 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2286 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2287 } 2288 /* wait on receives of mat->i,j */ 2289 /*------------------------------*/ 2290 count = nrecvs; 2291 while (count) { 2292 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2293 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2294 count--; 2295 } 2296 /* wait on sends of mat->i,j */ 2297 /*---------------------------*/ 2298 if (nsends) { 2299 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2300 } 2301 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2302 2303 /* post receives, send and receive mat->a */ 2304 /*----------------------------------------*/ 2305 for (imdex=0; imdex<nrecvs; imdex++) { 2306 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2307 } 2308 for (i=0; i<nsends; i++){ 2309 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2310 } 2311 count = nrecvs; 2312 while (count) { 2313 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2314 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2315 count--; 2316 } 2317 if (nsends) { 2318 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2319 } 2320 2321 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2322 2323 /* create redundant matrix */ 2324 /*-------------------------*/ 2325 if (reuse == MAT_INITIAL_MATRIX){ 2326 /* compute rownz_max for preallocation */ 2327 for (imdex=0; imdex<nrecvs; imdex++){ 2328 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2329 rptr = rbuf_j[imdex]; 2330 for (i=0; i<j; i++){ 2331 ncols = rptr[i+1] - rptr[i]; 2332 if (rownz_max < ncols) rownz_max = ncols; 2333 } 2334 } 2335 2336 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2337 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2338 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2339 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2340 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2341 } else { 2342 C = *matredundant; 2343 } 2344 2345 /* insert local matrix entries */ 2346 rptr = sbuf_j; 2347 cols = sbuf_j + rend-rstart + 1; 2348 vals = sbuf_a; 2349 for (i=0; i<rend-rstart; i++){ 2350 row = i + rstart; 2351 ncols = rptr[i+1] - rptr[i]; 2352 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2353 vals += ncols; 2354 cols += ncols; 2355 } 2356 /* insert received matrix entries */ 2357 for (imdex=0; imdex<nrecvs; imdex++){ 2358 rstart = rowrange[recv_rank[imdex]]; 2359 rend = rowrange[recv_rank[imdex]+1]; 2360 rptr = rbuf_j[imdex]; 2361 cols = rbuf_j[imdex] + rend-rstart + 1; 2362 vals = rbuf_a[imdex]; 2363 for (i=0; i<rend-rstart; i++){ 2364 row = i + rstart; 2365 ncols = rptr[i+1] - rptr[i]; 2366 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2367 vals += ncols; 2368 cols += ncols; 2369 } 2370 } 2371 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2373 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2374 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2375 if (reuse == MAT_INITIAL_MATRIX){ 2376 PetscContainer container; 2377 *matredundant = C; 2378 /* create a supporting struct and attach it to C for reuse */ 2379 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2380 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2381 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2382 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2383 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2384 2385 redund->nzlocal = nzlocal; 2386 redund->nsends = nsends; 2387 redund->nrecvs = nrecvs; 2388 redund->send_rank = send_rank; 2389 redund->sbuf_nz = sbuf_nz; 2390 redund->sbuf_j = sbuf_j; 2391 redund->sbuf_a = sbuf_a; 2392 redund->rbuf_j = rbuf_j; 2393 redund->rbuf_a = rbuf_a; 2394 2395 redund->MatDestroy = C->ops->destroy; 2396 C->ops->destroy = MatDestroy_MatRedundant; 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 PetscErrorCode ierr; 2407 PetscInt i,*idxb = 0; 2408 PetscScalar *va,*vb; 2409 Vec vtmp; 2410 2411 PetscFunctionBegin; 2412 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2413 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2414 if (idx) { 2415 for (i=0; i<A->rmap->n; i++) { 2416 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2417 } 2418 } 2419 2420 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2421 if (idx) { 2422 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2423 } 2424 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2425 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2426 2427 for (i=0; i<A->rmap->n; i++){ 2428 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2429 va[i] = vb[i]; 2430 if (idx) idx[i] = a->garray[idxb[i]]; 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2436 if (idxb) { 2437 ierr = PetscFree(idxb);CHKERRQ(ierr); 2438 } 2439 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2446 { 2447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2448 PetscErrorCode ierr; 2449 PetscInt i,*idxb = 0; 2450 PetscScalar *va,*vb; 2451 Vec vtmp; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2456 if (idx) { 2457 for (i=0; i<A->cmap->n; i++) { 2458 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2459 } 2460 } 2461 2462 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2463 if (idx) { 2464 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2465 } 2466 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2467 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2468 2469 for (i=0; i<A->rmap->n; i++){ 2470 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2471 va[i] = vb[i]; 2472 if (idx) idx[i] = a->garray[idxb[i]]; 2473 } 2474 } 2475 2476 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2477 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2478 if (idxb) { 2479 ierr = PetscFree(idxb);CHKERRQ(ierr); 2480 } 2481 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2488 { 2489 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2490 PetscInt n = A->rmap->n; 2491 PetscInt cstart = A->cmap->rstart; 2492 PetscInt *cmap = mat->garray; 2493 PetscInt *diagIdx, *offdiagIdx; 2494 Vec diagV, offdiagV; 2495 PetscScalar *a, *diagA, *offdiagA; 2496 PetscInt r; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2501 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2503 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2505 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2507 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2508 for(r = 0; r < n; ++r) { 2509 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 idx[r] = cstart + diagIdx[r]; 2512 } else { 2513 a[r] = offdiagA[r]; 2514 idx[r] = cmap[offdiagIdx[r]]; 2515 } 2516 } 2517 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2518 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2520 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2521 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2522 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2529 { 2530 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2531 PetscInt n = A->rmap->n; 2532 PetscInt cstart = A->cmap->rstart; 2533 PetscInt *cmap = mat->garray; 2534 PetscInt *diagIdx, *offdiagIdx; 2535 Vec diagV, offdiagV; 2536 PetscScalar *a, *diagA, *offdiagA; 2537 PetscInt r; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2542 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2543 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2544 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2545 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2546 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2547 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2548 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2549 for(r = 0; r < n; ++r) { 2550 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 idx[r] = cstart + diagIdx[r]; 2553 } else { 2554 a[r] = offdiagA[r]; 2555 idx[r] = cmap[offdiagIdx[r]]; 2556 } 2557 } 2558 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2559 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2560 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2561 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2562 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2563 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2570 { 2571 PetscErrorCode ierr; 2572 Mat *dummy; 2573 2574 PetscFunctionBegin; 2575 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2576 *newmat = *dummy; 2577 ierr = PetscFree(dummy);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 #ifdef PETSC_HAVE_PBGL 2591 MatSolve_MPIAIJ, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 /*10*/ 0, 2598 0, 2599 0, 2600 MatSOR_MPIAIJ, 2601 MatTranspose_MPIAIJ, 2602 /*15*/ MatGetInfo_MPIAIJ, 2603 MatEqual_MPIAIJ, 2604 MatGetDiagonal_MPIAIJ, 2605 MatDiagonalScale_MPIAIJ, 2606 MatNorm_MPIAIJ, 2607 /*20*/ MatAssemblyBegin_MPIAIJ, 2608 MatAssemblyEnd_MPIAIJ, 2609 MatSetOption_MPIAIJ, 2610 MatZeroEntries_MPIAIJ, 2611 /*24*/ MatZeroRows_MPIAIJ, 2612 0, 2613 #ifdef PETSC_HAVE_PBGL 2614 0, 2615 #else 2616 0, 2617 #endif 2618 0, 2619 0, 2620 /*29*/ MatSetUpPreallocation_MPIAIJ, 2621 #ifdef PETSC_HAVE_PBGL 2622 0, 2623 #else 2624 0, 2625 #endif 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIAIJ, 2635 MatGetSubMatrices_MPIAIJ, 2636 MatIncreaseOverlap_MPIAIJ, 2637 MatGetValues_MPIAIJ, 2638 MatCopy_MPIAIJ, 2639 /*44*/ MatGetRowMax_MPIAIJ, 2640 MatScale_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*49*/ MatSetBlockSize_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIAIJ, 2650 0, 2651 MatSetUnfactored_MPIAIJ, 2652 MatPermute_MPIAIJ, 2653 0, 2654 /*59*/ MatGetSubMatrix_MPIAIJ, 2655 MatDestroy_MPIAIJ, 2656 MatView_MPIAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2665 MatGetRowMinAbs_MPIAIJ, 2666 0, 2667 MatSetColoring_MPIAIJ, 2668 #if defined(PETSC_HAVE_ADIC) 2669 MatSetValuesAdic_MPIAIJ, 2670 #else 2671 0, 2672 #endif 2673 MatSetValuesAdifor_MPIAIJ, 2674 /*75*/ MatFDColoringApply_AIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*80*/ 0, 2680 0, 2681 0, 2682 /*83*/ MatLoad_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2689 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2690 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2691 MatPtAP_Basic, 2692 MatPtAPSymbolic_MPIAIJ, 2693 /*94*/ MatPtAPNumeric_MPIAIJ, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*99*/ 0, 2699 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2700 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2701 MatConjugate_MPIAIJ, 2702 0, 2703 /*104*/MatSetValuesRow_MPIAIJ, 2704 MatRealPart_MPIAIJ, 2705 MatImaginaryPart_MPIAIJ, 2706 0, 2707 0, 2708 /*109*/0, 2709 MatGetRedundantMatrix_MPIAIJ, 2710 MatGetRowMin_MPIAIJ, 2711 0, 2712 0, 2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 0 2719 }; 2720 2721 /* ----------------------------------------------------------------------------------------*/ 2722 2723 EXTERN_C_BEGIN 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 EXTERN_C_END 2737 2738 EXTERN_C_BEGIN 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 EXTERN_C_END 2752 2753 #include "petscpc.h" 2754 EXTERN_C_BEGIN 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2758 { 2759 Mat_MPIAIJ *b; 2760 PetscErrorCode ierr; 2761 PetscInt i; 2762 2763 PetscFunctionBegin; 2764 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2765 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2766 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2767 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2768 2769 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 if (d_nnz) { 2774 for (i=0; i<B->rmap->n; i++) { 2775 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2776 } 2777 } 2778 if (o_nnz) { 2779 for (i=0; i<B->rmap->n; i++) { 2780 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2781 } 2782 } 2783 b = (Mat_MPIAIJ*)B->data; 2784 2785 if (!B->preallocated) { 2786 /* Explicitly create 2 MATSEQAIJ matrices. */ 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2791 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2792 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2793 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2795 } 2796 2797 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2798 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2799 B->preallocated = PETSC_TRUE; 2800 PetscFunctionReturn(0); 2801 } 2802 EXTERN_C_END 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2807 { 2808 Mat mat; 2809 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 *newmat = 0; 2814 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2816 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2817 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2818 a = (Mat_MPIAIJ*)mat->data; 2819 2820 mat->factor = matin->factor; 2821 mat->rmap->bs = matin->rmap->bs; 2822 mat->assembled = PETSC_TRUE; 2823 mat->insertmode = NOT_SET_VALUES; 2824 mat->preallocated = PETSC_TRUE; 2825 2826 a->size = oldmat->size; 2827 a->rank = oldmat->rank; 2828 a->donotstash = oldmat->donotstash; 2829 a->roworiented = oldmat->roworiented; 2830 a->rowindices = 0; 2831 a->rowvalues = 0; 2832 a->getrowactive = PETSC_FALSE; 2833 2834 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2835 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2836 2837 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2838 if (oldmat->colmap) { 2839 #if defined (PETSC_USE_CTABLE) 2840 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2841 #else 2842 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2843 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2844 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2845 #endif 2846 } else a->colmap = 0; 2847 if (oldmat->garray) { 2848 PetscInt len; 2849 len = oldmat->B->cmap->n; 2850 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2851 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2852 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2853 } else a->garray = 0; 2854 2855 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2857 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2859 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2861 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2862 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2863 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2864 *newmat = mat; 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #include "petscsys.h" 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatLoad_MPIAIJ" 2872 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2873 { 2874 Mat A; 2875 PetscScalar *vals,*svals; 2876 MPI_Comm comm = ((PetscObject)viewer)->comm; 2877 MPI_Status status; 2878 PetscErrorCode ierr; 2879 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2880 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2881 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2882 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2883 PetscInt cend,cstart,n,*rowners; 2884 int fd; 2885 2886 PetscFunctionBegin; 2887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2889 if (!rank) { 2890 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2891 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2892 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2893 } 2894 2895 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2896 M = header[1]; N = header[2]; 2897 /* determine ownership of all rows */ 2898 m = M/size + ((M % size) > rank); 2899 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2900 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2901 2902 /* First process needs enough room for process with most rows */ 2903 if (!rank) { 2904 mmax = rowners[1]; 2905 for (i=2; i<size; i++) { 2906 mmax = PetscMax(mmax,rowners[i]); 2907 } 2908 } else mmax = m; 2909 2910 rowners[0] = 0; 2911 for (i=2; i<=size; i++) { 2912 rowners[i] += rowners[i-1]; 2913 } 2914 rstart = rowners[rank]; 2915 rend = rowners[rank+1]; 2916 2917 /* distribute row lengths to all processors */ 2918 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2919 if (!rank) { 2920 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2921 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2922 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2923 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2924 for (j=0; j<m; j++) { 2925 procsnz[0] += ourlens[j]; 2926 } 2927 for (i=1; i<size; i++) { 2928 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2929 /* calculate the number of nonzeros on each processor */ 2930 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2931 procsnz[i] += rowlengths[j]; 2932 } 2933 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2934 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2935 } 2936 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2937 } else { 2938 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2939 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2940 } 2941 2942 if (!rank) { 2943 /* determine max buffer needed and allocate it */ 2944 maxnz = 0; 2945 for (i=0; i<size; i++) { 2946 maxnz = PetscMax(maxnz,procsnz[i]); 2947 } 2948 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2949 2950 /* read in my part of the matrix column indices */ 2951 nz = procsnz[0]; 2952 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2953 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2954 2955 /* read in every one elses and ship off */ 2956 for (i=1; i<size; i++) { 2957 nz = procsnz[i]; 2958 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2959 mpicnt = PetscMPIIntCast(nz); 2960 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2961 } 2962 ierr = PetscFree(cols);CHKERRQ(ierr); 2963 } else { 2964 /* determine buffer space needed for message */ 2965 nz = 0; 2966 for (i=0; i<m; i++) { 2967 nz += ourlens[i]; 2968 } 2969 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2970 2971 /* receive message of column indices*/ 2972 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2973 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2974 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2975 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2976 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2977 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2978 } 2979 2980 /* determine column ownership if matrix is not square */ 2981 if (N != M) { 2982 n = N/size + ((N % size) > rank); 2983 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2984 cstart = cend - n; 2985 } else { 2986 cstart = rstart; 2987 cend = rend; 2988 n = cend - cstart; 2989 } 2990 2991 /* loop over local rows, determining number of off diagonal entries */ 2992 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2993 jj = 0; 2994 for (i=0; i<m; i++) { 2995 for (j=0; j<ourlens[i]; j++) { 2996 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2997 jj++; 2998 } 2999 } 3000 3001 /* create our matrix */ 3002 for (i=0; i<m; i++) { 3003 ourlens[i] -= offlens[i]; 3004 } 3005 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3006 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3007 ierr = MatSetType(A,type);CHKERRQ(ierr); 3008 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3009 3010 for (i=0; i<m; i++) { 3011 ourlens[i] += offlens[i]; 3012 } 3013 3014 if (!rank) { 3015 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3016 3017 /* read in my part of the matrix numerical values */ 3018 nz = procsnz[0]; 3019 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3020 3021 /* insert into matrix */ 3022 jj = rstart; 3023 smycols = mycols; 3024 svals = vals; 3025 for (i=0; i<m; i++) { 3026 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3027 smycols += ourlens[i]; 3028 svals += ourlens[i]; 3029 jj++; 3030 } 3031 3032 /* read in other processors and ship out */ 3033 for (i=1; i<size; i++) { 3034 nz = procsnz[i]; 3035 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3036 mpicnt = PetscMPIIntCast(nz); 3037 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3038 } 3039 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3040 } else { 3041 /* receive numeric values */ 3042 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3043 3044 /* receive message of values*/ 3045 mpicnt = PetscMPIIntCast(nz); 3046 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3047 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3048 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3049 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3050 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3051 3052 /* insert into matrix */ 3053 jj = rstart; 3054 smycols = mycols; 3055 svals = vals; 3056 for (i=0; i<m; i++) { 3057 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3058 smycols += ourlens[i]; 3059 svals += ourlens[i]; 3060 jj++; 3061 } 3062 } 3063 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3064 ierr = PetscFree(vals);CHKERRQ(ierr); 3065 ierr = PetscFree(mycols);CHKERRQ(ierr); 3066 ierr = PetscFree(rowners);CHKERRQ(ierr); 3067 3068 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3070 *newmat = A; 3071 PetscFunctionReturn(0); 3072 } 3073 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3077 { 3078 PetscErrorCode ierr; 3079 IS iscol_local; 3080 PetscInt csize; 3081 3082 PetscFunctionBegin; 3083 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3084 if (call == MAT_REUSE_MATRIX) { 3085 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3086 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3087 } else { 3088 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3089 } 3090 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3091 if (call == MAT_INITIAL_MATRIX) { 3092 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3093 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3094 } 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3100 /* 3101 Not great since it makes two copies of the submatrix, first an SeqAIJ 3102 in local and then by concatenating the local matrices the end result. 3103 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3104 3105 Note: This requires a sequential iscol with all indices. 3106 */ 3107 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3108 { 3109 PetscErrorCode ierr; 3110 PetscMPIInt rank,size; 3111 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3112 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3113 Mat *local,M,Mreuse; 3114 MatScalar *vwork,*aa; 3115 MPI_Comm comm = ((PetscObject)mat)->comm; 3116 Mat_SeqAIJ *aij; 3117 3118 3119 PetscFunctionBegin; 3120 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3121 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3122 3123 if (call == MAT_REUSE_MATRIX) { 3124 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3125 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3126 local = &Mreuse; 3127 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3128 } else { 3129 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3130 Mreuse = *local; 3131 ierr = PetscFree(local);CHKERRQ(ierr); 3132 } 3133 3134 /* 3135 m - number of local rows 3136 n - number of columns (same on all processors) 3137 rstart - first row in new global matrix generated 3138 */ 3139 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3140 if (call == MAT_INITIAL_MATRIX) { 3141 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3142 ii = aij->i; 3143 jj = aij->j; 3144 3145 /* 3146 Determine the number of non-zeros in the diagonal and off-diagonal 3147 portions of the matrix in order to do correct preallocation 3148 */ 3149 3150 /* first get start and end of "diagonal" columns */ 3151 if (csize == PETSC_DECIDE) { 3152 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3153 if (mglobal == n) { /* square matrix */ 3154 nlocal = m; 3155 } else { 3156 nlocal = n/size + ((n % size) > rank); 3157 } 3158 } else { 3159 nlocal = csize; 3160 } 3161 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3162 rstart = rend - nlocal; 3163 if (rank == size - 1 && rend != n) { 3164 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3165 } 3166 3167 /* next, compute all the lengths */ 3168 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3169 olens = dlens + m; 3170 for (i=0; i<m; i++) { 3171 jend = ii[i+1] - ii[i]; 3172 olen = 0; 3173 dlen = 0; 3174 for (j=0; j<jend; j++) { 3175 if (*jj < rstart || *jj >= rend) olen++; 3176 else dlen++; 3177 jj++; 3178 } 3179 olens[i] = olen; 3180 dlens[i] = dlen; 3181 } 3182 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3183 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3184 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3185 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3186 ierr = PetscFree(dlens);CHKERRQ(ierr); 3187 } else { 3188 PetscInt ml,nl; 3189 3190 M = *newmat; 3191 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3192 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3193 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3194 /* 3195 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3196 rather than the slower MatSetValues(). 3197 */ 3198 M->was_assembled = PETSC_TRUE; 3199 M->assembled = PETSC_FALSE; 3200 } 3201 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3202 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3203 ii = aij->i; 3204 jj = aij->j; 3205 aa = aij->a; 3206 for (i=0; i<m; i++) { 3207 row = rstart + i; 3208 nz = ii[i+1] - ii[i]; 3209 cwork = jj; jj += nz; 3210 vwork = aa; aa += nz; 3211 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3212 } 3213 3214 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3215 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 *newmat = M; 3217 3218 /* save submatrix used in processor for next request */ 3219 if (call == MAT_INITIAL_MATRIX) { 3220 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3221 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3222 } 3223 3224 PetscFunctionReturn(0); 3225 } 3226 3227 EXTERN_C_BEGIN 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3230 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3231 { 3232 PetscInt m,cstart, cend,j,nnz,i,d; 3233 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3234 const PetscInt *JJ; 3235 PetscScalar *values; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3240 3241 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3243 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3244 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3245 m = B->rmap->n; 3246 cstart = B->cmap->rstart; 3247 cend = B->cmap->rend; 3248 rstart = B->rmap->rstart; 3249 3250 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 3251 o_nnz = d_nnz + m; 3252 3253 #if defined(PETSC_USE_DEBUGGING) 3254 for (i=0; i<m; i++) { 3255 nnz = Ii[i+1]- Ii[i]; 3256 JJ = J + Ii[i]; 3257 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3258 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3259 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3260 } 3261 #endif 3262 3263 for (i=0; i<m; i++) { 3264 nnz = Ii[i+1]- Ii[i]; 3265 JJ = J + Ii[i]; 3266 nnz_max = PetscMax(nnz_max,nnz); 3267 d = 0; 3268 for (j=0; j<nnz; j++) { 3269 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3270 } 3271 d_nnz[i] = d; 3272 o_nnz[i] = nnz - d; 3273 } 3274 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3275 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 3276 3277 if (v) values = (PetscScalar*)v; 3278 else { 3279 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3280 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3281 } 3282 3283 for (i=0; i<m; i++) { 3284 ii = i + rstart; 3285 nnz = Ii[i+1]- Ii[i]; 3286 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3287 } 3288 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 3291 if (!v) { 3292 ierr = PetscFree(values);CHKERRQ(ierr); 3293 } 3294 PetscFunctionReturn(0); 3295 } 3296 EXTERN_C_END 3297 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3300 /*@ 3301 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3302 (the default parallel PETSc format). 3303 3304 Collective on MPI_Comm 3305 3306 Input Parameters: 3307 + B - the matrix 3308 . i - the indices into j for the start of each local row (starts with zero) 3309 . j - the column indices for each local row (starts with zero) 3310 - v - optional values in the matrix 3311 3312 Level: developer 3313 3314 Notes: 3315 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3316 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3317 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3318 3319 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3320 3321 The format which is used for the sparse matrix input, is equivalent to a 3322 row-major ordering.. i.e for the following matrix, the input data expected is 3323 as shown: 3324 3325 1 0 0 3326 2 0 3 P0 3327 ------- 3328 4 5 6 P1 3329 3330 Process0 [P0]: rows_owned=[0,1] 3331 i = {0,1,3} [size = nrow+1 = 2+1] 3332 j = {0,0,2} [size = nz = 6] 3333 v = {1,2,3} [size = nz = 6] 3334 3335 Process1 [P1]: rows_owned=[2] 3336 i = {0,3} [size = nrow+1 = 1+1] 3337 j = {0,1,2} [size = nz = 6] 3338 v = {4,5,6} [size = nz = 6] 3339 3340 .keywords: matrix, aij, compressed row, sparse, parallel 3341 3342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3343 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3344 @*/ 3345 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3346 { 3347 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3348 3349 PetscFunctionBegin; 3350 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3351 if (f) { 3352 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3353 } 3354 PetscFunctionReturn(0); 3355 } 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3359 /*@C 3360 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3361 (the default parallel PETSc format). For good matrix assembly performance 3362 the user should preallocate the matrix storage by setting the parameters 3363 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3364 performance can be increased by more than a factor of 50. 3365 3366 Collective on MPI_Comm 3367 3368 Input Parameters: 3369 + A - the matrix 3370 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3371 (same value is used for all local rows) 3372 . d_nnz - array containing the number of nonzeros in the various rows of the 3373 DIAGONAL portion of the local submatrix (possibly different for each row) 3374 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3375 The size of this array is equal to the number of local rows, i.e 'm'. 3376 You must leave room for the diagonal entry even if it is zero. 3377 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3378 submatrix (same value is used for all local rows). 3379 - o_nnz - array containing the number of nonzeros in the various rows of the 3380 OFF-DIAGONAL portion of the local submatrix (possibly different for 3381 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3382 structure. The size of this array is equal to the number 3383 of local rows, i.e 'm'. 3384 3385 If the *_nnz parameter is given then the *_nz parameter is ignored 3386 3387 The AIJ format (also called the Yale sparse matrix format or 3388 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3389 storage. The stored row and column indices begin with zero. See the users manual for details. 3390 3391 The parallel matrix is partitioned such that the first m0 rows belong to 3392 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3393 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3394 3395 The DIAGONAL portion of the local submatrix of a processor can be defined 3396 as the submatrix which is obtained by extraction the part corresponding 3397 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3398 first row that belongs to the processor, and r2 is the last row belonging 3399 to the this processor. This is a square mxm matrix. The remaining portion 3400 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3401 3402 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3403 3404 You can call MatGetInfo() to get information on how effective the preallocation was; 3405 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3406 You can also run with the option -info and look for messages with the string 3407 malloc in them to see if additional memory allocation was needed. 3408 3409 Example usage: 3410 3411 Consider the following 8x8 matrix with 34 non-zero values, that is 3412 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3413 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3414 as follows: 3415 3416 .vb 3417 1 2 0 | 0 3 0 | 0 4 3418 Proc0 0 5 6 | 7 0 0 | 8 0 3419 9 0 10 | 11 0 0 | 12 0 3420 ------------------------------------- 3421 13 0 14 | 15 16 17 | 0 0 3422 Proc1 0 18 0 | 19 20 21 | 0 0 3423 0 0 0 | 22 23 0 | 24 0 3424 ------------------------------------- 3425 Proc2 25 26 27 | 0 0 28 | 29 0 3426 30 0 0 | 31 32 33 | 0 34 3427 .ve 3428 3429 This can be represented as a collection of submatrices as: 3430 3431 .vb 3432 A B C 3433 D E F 3434 G H I 3435 .ve 3436 3437 Where the submatrices A,B,C are owned by proc0, D,E,F are 3438 owned by proc1, G,H,I are owned by proc2. 3439 3440 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3441 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3442 The 'M','N' parameters are 8,8, and have the same values on all procs. 3443 3444 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3445 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3446 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3447 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3448 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3449 matrix, ans [DF] as another SeqAIJ matrix. 3450 3451 When d_nz, o_nz parameters are specified, d_nz storage elements are 3452 allocated for every row of the local diagonal submatrix, and o_nz 3453 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3454 One way to choose d_nz and o_nz is to use the max nonzerors per local 3455 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3456 In this case, the values of d_nz,o_nz are: 3457 .vb 3458 proc0 : dnz = 2, o_nz = 2 3459 proc1 : dnz = 3, o_nz = 2 3460 proc2 : dnz = 1, o_nz = 4 3461 .ve 3462 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3463 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3464 for proc3. i.e we are using 12+15+10=37 storage locations to store 3465 34 values. 3466 3467 When d_nnz, o_nnz parameters are specified, the storage is specified 3468 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3469 In the above case the values for d_nnz,o_nnz are: 3470 .vb 3471 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3472 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3473 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3474 .ve 3475 Here the space allocated is sum of all the above values i.e 34, and 3476 hence pre-allocation is perfect. 3477 3478 Level: intermediate 3479 3480 .keywords: matrix, aij, compressed row, sparse, parallel 3481 3482 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3483 MPIAIJ, MatGetInfo() 3484 @*/ 3485 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3486 { 3487 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3488 3489 PetscFunctionBegin; 3490 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3491 if (f) { 3492 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3493 } 3494 PetscFunctionReturn(0); 3495 } 3496 3497 #undef __FUNCT__ 3498 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3499 /*@ 3500 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3501 CSR format the local rows. 3502 3503 Collective on MPI_Comm 3504 3505 Input Parameters: 3506 + comm - MPI communicator 3507 . m - number of local rows (Cannot be PETSC_DECIDE) 3508 . n - This value should be the same as the local size used in creating the 3509 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3510 calculated if N is given) For square matrices n is almost always m. 3511 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3512 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3513 . i - row indices 3514 . j - column indices 3515 - a - matrix values 3516 3517 Output Parameter: 3518 . mat - the matrix 3519 3520 Level: intermediate 3521 3522 Notes: 3523 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3524 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3525 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3526 3527 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3528 3529 The format which is used for the sparse matrix input, is equivalent to a 3530 row-major ordering.. i.e for the following matrix, the input data expected is 3531 as shown: 3532 3533 1 0 0 3534 2 0 3 P0 3535 ------- 3536 4 5 6 P1 3537 3538 Process0 [P0]: rows_owned=[0,1] 3539 i = {0,1,3} [size = nrow+1 = 2+1] 3540 j = {0,0,2} [size = nz = 6] 3541 v = {1,2,3} [size = nz = 6] 3542 3543 Process1 [P1]: rows_owned=[2] 3544 i = {0,3} [size = nrow+1 = 1+1] 3545 j = {0,1,2} [size = nz = 6] 3546 v = {4,5,6} [size = nz = 6] 3547 3548 .keywords: matrix, aij, compressed row, sparse, parallel 3549 3550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3551 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3552 @*/ 3553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3554 { 3555 PetscErrorCode ierr; 3556 3557 PetscFunctionBegin; 3558 if (i[0]) { 3559 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3560 } 3561 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3562 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3563 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3564 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3565 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3566 PetscFunctionReturn(0); 3567 } 3568 3569 #undef __FUNCT__ 3570 #define __FUNCT__ "MatCreateMPIAIJ" 3571 /*@C 3572 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3573 (the default parallel PETSc format). For good matrix assembly performance 3574 the user should preallocate the matrix storage by setting the parameters 3575 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3576 performance can be increased by more than a factor of 50. 3577 3578 Collective on MPI_Comm 3579 3580 Input Parameters: 3581 + comm - MPI communicator 3582 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3583 This value should be the same as the local size used in creating the 3584 y vector for the matrix-vector product y = Ax. 3585 . n - This value should be the same as the local size used in creating the 3586 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3587 calculated if N is given) For square matrices n is almost always m. 3588 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3589 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3590 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3591 (same value is used for all local rows) 3592 . d_nnz - array containing the number of nonzeros in the various rows of the 3593 DIAGONAL portion of the local submatrix (possibly different for each row) 3594 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3595 The size of this array is equal to the number of local rows, i.e 'm'. 3596 You must leave room for the diagonal entry even if it is zero. 3597 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3598 submatrix (same value is used for all local rows). 3599 - o_nnz - array containing the number of nonzeros in the various rows of the 3600 OFF-DIAGONAL portion of the local submatrix (possibly different for 3601 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3602 structure. The size of this array is equal to the number 3603 of local rows, i.e 'm'. 3604 3605 Output Parameter: 3606 . A - the matrix 3607 3608 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3609 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3610 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3611 3612 Notes: 3613 If the *_nnz parameter is given then the *_nz parameter is ignored 3614 3615 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3616 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3617 storage requirements for this matrix. 3618 3619 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3620 processor than it must be used on all processors that share the object for 3621 that argument. 3622 3623 The user MUST specify either the local or global matrix dimensions 3624 (possibly both). 3625 3626 The parallel matrix is partitioned across processors such that the 3627 first m0 rows belong to process 0, the next m1 rows belong to 3628 process 1, the next m2 rows belong to process 2 etc.. where 3629 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3630 values corresponding to [m x N] submatrix. 3631 3632 The columns are logically partitioned with the n0 columns belonging 3633 to 0th partition, the next n1 columns belonging to the next 3634 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3635 3636 The DIAGONAL portion of the local submatrix on any given processor 3637 is the submatrix corresponding to the rows and columns m,n 3638 corresponding to the given processor. i.e diagonal matrix on 3639 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3640 etc. The remaining portion of the local submatrix [m x (N-n)] 3641 constitute the OFF-DIAGONAL portion. The example below better 3642 illustrates this concept. 3643 3644 For a square global matrix we define each processor's diagonal portion 3645 to be its local rows and the corresponding columns (a square submatrix); 3646 each processor's off-diagonal portion encompasses the remainder of the 3647 local matrix (a rectangular submatrix). 3648 3649 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3650 3651 When calling this routine with a single process communicator, a matrix of 3652 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3653 type of communicator, use the construction mechanism: 3654 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3655 3656 By default, this format uses inodes (identical nodes) when possible. 3657 We search for consecutive rows with the same nonzero structure, thereby 3658 reusing matrix information to achieve increased efficiency. 3659 3660 Options Database Keys: 3661 + -mat_no_inode - Do not use inodes 3662 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3663 - -mat_aij_oneindex - Internally use indexing starting at 1 3664 rather than 0. Note that when calling MatSetValues(), 3665 the user still MUST index entries starting at 0! 3666 3667 3668 Example usage: 3669 3670 Consider the following 8x8 matrix with 34 non-zero values, that is 3671 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3672 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3673 as follows: 3674 3675 .vb 3676 1 2 0 | 0 3 0 | 0 4 3677 Proc0 0 5 6 | 7 0 0 | 8 0 3678 9 0 10 | 11 0 0 | 12 0 3679 ------------------------------------- 3680 13 0 14 | 15 16 17 | 0 0 3681 Proc1 0 18 0 | 19 20 21 | 0 0 3682 0 0 0 | 22 23 0 | 24 0 3683 ------------------------------------- 3684 Proc2 25 26 27 | 0 0 28 | 29 0 3685 30 0 0 | 31 32 33 | 0 34 3686 .ve 3687 3688 This can be represented as a collection of submatrices as: 3689 3690 .vb 3691 A B C 3692 D E F 3693 G H I 3694 .ve 3695 3696 Where the submatrices A,B,C are owned by proc0, D,E,F are 3697 owned by proc1, G,H,I are owned by proc2. 3698 3699 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3700 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3701 The 'M','N' parameters are 8,8, and have the same values on all procs. 3702 3703 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3704 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3705 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3706 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3707 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3708 matrix, ans [DF] as another SeqAIJ matrix. 3709 3710 When d_nz, o_nz parameters are specified, d_nz storage elements are 3711 allocated for every row of the local diagonal submatrix, and o_nz 3712 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3713 One way to choose d_nz and o_nz is to use the max nonzerors per local 3714 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3715 In this case, the values of d_nz,o_nz are: 3716 .vb 3717 proc0 : dnz = 2, o_nz = 2 3718 proc1 : dnz = 3, o_nz = 2 3719 proc2 : dnz = 1, o_nz = 4 3720 .ve 3721 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3722 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3723 for proc3. i.e we are using 12+15+10=37 storage locations to store 3724 34 values. 3725 3726 When d_nnz, o_nnz parameters are specified, the storage is specified 3727 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3728 In the above case the values for d_nnz,o_nnz are: 3729 .vb 3730 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3731 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3732 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3733 .ve 3734 Here the space allocated is sum of all the above values i.e 34, and 3735 hence pre-allocation is perfect. 3736 3737 Level: intermediate 3738 3739 .keywords: matrix, aij, compressed row, sparse, parallel 3740 3741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3742 MPIAIJ, MatCreateMPIAIJWithArrays() 3743 @*/ 3744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3745 { 3746 PetscErrorCode ierr; 3747 PetscMPIInt size; 3748 3749 PetscFunctionBegin; 3750 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3751 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3752 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3753 if (size > 1) { 3754 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3755 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3756 } else { 3757 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3758 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3759 } 3760 PetscFunctionReturn(0); 3761 } 3762 3763 #undef __FUNCT__ 3764 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3766 { 3767 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3768 3769 PetscFunctionBegin; 3770 *Ad = a->A; 3771 *Ao = a->B; 3772 *colmap = a->garray; 3773 PetscFunctionReturn(0); 3774 } 3775 3776 #undef __FUNCT__ 3777 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3778 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3779 { 3780 PetscErrorCode ierr; 3781 PetscInt i; 3782 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3783 3784 PetscFunctionBegin; 3785 if (coloring->ctype == IS_COLORING_GLOBAL) { 3786 ISColoringValue *allcolors,*colors; 3787 ISColoring ocoloring; 3788 3789 /* set coloring for diagonal portion */ 3790 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3791 3792 /* set coloring for off-diagonal portion */ 3793 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3794 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3795 for (i=0; i<a->B->cmap->n; i++) { 3796 colors[i] = allcolors[a->garray[i]]; 3797 } 3798 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3799 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3800 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3801 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3802 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3803 ISColoringValue *colors; 3804 PetscInt *larray; 3805 ISColoring ocoloring; 3806 3807 /* set coloring for diagonal portion */ 3808 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3809 for (i=0; i<a->A->cmap->n; i++) { 3810 larray[i] = i + A->cmap->rstart; 3811 } 3812 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3813 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3814 for (i=0; i<a->A->cmap->n; i++) { 3815 colors[i] = coloring->colors[larray[i]]; 3816 } 3817 ierr = PetscFree(larray);CHKERRQ(ierr); 3818 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3819 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3820 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3821 3822 /* set coloring for off-diagonal portion */ 3823 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3824 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3825 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3826 for (i=0; i<a->B->cmap->n; i++) { 3827 colors[i] = coloring->colors[larray[i]]; 3828 } 3829 ierr = PetscFree(larray);CHKERRQ(ierr); 3830 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3831 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3832 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3833 } else { 3834 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3835 } 3836 3837 PetscFunctionReturn(0); 3838 } 3839 3840 #if defined(PETSC_HAVE_ADIC) 3841 #undef __FUNCT__ 3842 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3843 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3844 { 3845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3846 PetscErrorCode ierr; 3847 3848 PetscFunctionBegin; 3849 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3850 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3851 PetscFunctionReturn(0); 3852 } 3853 #endif 3854 3855 #undef __FUNCT__ 3856 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3857 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3858 { 3859 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3860 PetscErrorCode ierr; 3861 3862 PetscFunctionBegin; 3863 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3864 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3865 PetscFunctionReturn(0); 3866 } 3867 3868 #undef __FUNCT__ 3869 #define __FUNCT__ "MatMerge" 3870 /*@ 3871 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3872 matrices from each processor 3873 3874 Collective on MPI_Comm 3875 3876 Input Parameters: 3877 + comm - the communicators the parallel matrix will live on 3878 . inmat - the input sequential matrices 3879 . n - number of local columns (or PETSC_DECIDE) 3880 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3881 3882 Output Parameter: 3883 . outmat - the parallel matrix generated 3884 3885 Level: advanced 3886 3887 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3888 3889 @*/ 3890 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3891 { 3892 PetscErrorCode ierr; 3893 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3894 PetscInt *indx; 3895 PetscScalar *values; 3896 3897 PetscFunctionBegin; 3898 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3899 if (scall == MAT_INITIAL_MATRIX){ 3900 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3901 if (n == PETSC_DECIDE){ 3902 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3903 } 3904 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3905 rstart -= m; 3906 3907 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3908 for (i=0;i<m;i++) { 3909 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3910 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3911 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3912 } 3913 /* This routine will ONLY return MPIAIJ type matrix */ 3914 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3915 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3916 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3917 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3918 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3919 3920 } else if (scall == MAT_REUSE_MATRIX){ 3921 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3922 } else { 3923 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3924 } 3925 3926 for (i=0;i<m;i++) { 3927 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3928 Ii = i + rstart; 3929 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3930 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3931 } 3932 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3933 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 3936 PetscFunctionReturn(0); 3937 } 3938 3939 #undef __FUNCT__ 3940 #define __FUNCT__ "MatFileSplit" 3941 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3942 { 3943 PetscErrorCode ierr; 3944 PetscMPIInt rank; 3945 PetscInt m,N,i,rstart,nnz; 3946 size_t len; 3947 const PetscInt *indx; 3948 PetscViewer out; 3949 char *name; 3950 Mat B; 3951 const PetscScalar *values; 3952 3953 PetscFunctionBegin; 3954 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3955 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3956 /* Should this be the type of the diagonal block of A? */ 3957 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3958 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3959 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3960 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3961 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3962 for (i=0;i<m;i++) { 3963 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3964 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3965 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3966 } 3967 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3968 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3969 3970 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3971 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3972 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3973 sprintf(name,"%s.%d",outfile,rank); 3974 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3975 ierr = PetscFree(name); 3976 ierr = MatView(B,out);CHKERRQ(ierr); 3977 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3978 ierr = MatDestroy(B);CHKERRQ(ierr); 3979 PetscFunctionReturn(0); 3980 } 3981 3982 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3983 #undef __FUNCT__ 3984 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3985 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3986 { 3987 PetscErrorCode ierr; 3988 Mat_Merge_SeqsToMPI *merge; 3989 PetscContainer container; 3990 3991 PetscFunctionBegin; 3992 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3993 if (container) { 3994 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4002 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4003 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4004 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4005 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4006 4007 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4008 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4009 } 4010 ierr = PetscFree(merge);CHKERRQ(ierr); 4011 4012 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4013 PetscFunctionReturn(0); 4014 } 4015 4016 #include "../src/mat/utils/freespace.h" 4017 #include "petscbt.h" 4018 4019 #undef __FUNCT__ 4020 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4021 /*@C 4022 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4023 matrices from each processor 4024 4025 Collective on MPI_Comm 4026 4027 Input Parameters: 4028 + comm - the communicators the parallel matrix will live on 4029 . seqmat - the input sequential matrices 4030 . m - number of local rows (or PETSC_DECIDE) 4031 . n - number of local columns (or PETSC_DECIDE) 4032 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4033 4034 Output Parameter: 4035 . mpimat - the parallel matrix generated 4036 4037 Level: advanced 4038 4039 Notes: 4040 The dimensions of the sequential matrix in each processor MUST be the same. 4041 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4042 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4043 @*/ 4044 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4045 { 4046 PetscErrorCode ierr; 4047 MPI_Comm comm=((PetscObject)mpimat)->comm; 4048 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4049 PetscMPIInt size,rank,taga,*len_s; 4050 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4051 PetscInt proc,m; 4052 PetscInt **buf_ri,**buf_rj; 4053 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4054 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4055 MPI_Request *s_waits,*r_waits; 4056 MPI_Status *status; 4057 MatScalar *aa=a->a; 4058 MatScalar **abuf_r,*ba_i; 4059 Mat_Merge_SeqsToMPI *merge; 4060 PetscContainer container; 4061 4062 PetscFunctionBegin; 4063 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4064 4065 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4066 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4067 4068 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4069 if (container) { 4070 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4071 } 4072 bi = merge->bi; 4073 bj = merge->bj; 4074 buf_ri = merge->buf_ri; 4075 buf_rj = merge->buf_rj; 4076 4077 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4078 owners = merge->rowmap->range; 4079 len_s = merge->len_s; 4080 4081 /* send and recv matrix values */ 4082 /*-----------------------------*/ 4083 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4084 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4085 4086 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4087 for (proc=0,k=0; proc<size; proc++){ 4088 if (!len_s[proc]) continue; 4089 i = owners[proc]; 4090 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4091 k++; 4092 } 4093 4094 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4095 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4096 ierr = PetscFree(status);CHKERRQ(ierr); 4097 4098 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4099 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4100 4101 /* insert mat values of mpimat */ 4102 /*----------------------------*/ 4103 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4104 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4105 nextrow = buf_ri_k + merge->nrecv; 4106 nextai = nextrow + merge->nrecv; 4107 4108 for (k=0; k<merge->nrecv; k++){ 4109 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4110 nrows = *(buf_ri_k[k]); 4111 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4112 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4113 } 4114 4115 /* set values of ba */ 4116 m = merge->rowmap->n; 4117 for (i=0; i<m; i++) { 4118 arow = owners[rank] + i; 4119 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4120 bnzi = bi[i+1] - bi[i]; 4121 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4122 4123 /* add local non-zero vals of this proc's seqmat into ba */ 4124 anzi = ai[arow+1] - ai[arow]; 4125 aj = a->j + ai[arow]; 4126 aa = a->a + ai[arow]; 4127 nextaj = 0; 4128 for (j=0; nextaj<anzi; j++){ 4129 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4130 ba_i[j] += aa[nextaj++]; 4131 } 4132 } 4133 4134 /* add received vals into ba */ 4135 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4136 /* i-th row */ 4137 if (i == *nextrow[k]) { 4138 anzi = *(nextai[k]+1) - *nextai[k]; 4139 aj = buf_rj[k] + *(nextai[k]); 4140 aa = abuf_r[k] + *(nextai[k]); 4141 nextaj = 0; 4142 for (j=0; nextaj<anzi; j++){ 4143 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4144 ba_i[j] += aa[nextaj++]; 4145 } 4146 } 4147 nextrow[k]++; nextai[k]++; 4148 } 4149 } 4150 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4151 } 4152 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4153 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4154 4155 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4156 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4157 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4158 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4159 PetscFunctionReturn(0); 4160 } 4161 4162 #undef __FUNCT__ 4163 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4164 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4165 { 4166 PetscErrorCode ierr; 4167 Mat B_mpi; 4168 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4169 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4170 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4171 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4172 PetscInt len,proc,*dnz,*onz; 4173 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4174 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4175 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4176 MPI_Status *status; 4177 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4178 PetscBT lnkbt; 4179 Mat_Merge_SeqsToMPI *merge; 4180 PetscContainer container; 4181 4182 PetscFunctionBegin; 4183 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4184 4185 /* make sure it is a PETSc comm */ 4186 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4187 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4188 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4189 4190 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4191 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4192 4193 /* determine row ownership */ 4194 /*---------------------------------------------------------*/ 4195 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4196 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4197 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4198 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4199 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4200 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4201 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4202 4203 m = merge->rowmap->n; 4204 M = merge->rowmap->N; 4205 owners = merge->rowmap->range; 4206 4207 /* determine the number of messages to send, their lengths */ 4208 /*---------------------------------------------------------*/ 4209 len_s = merge->len_s; 4210 4211 len = 0; /* length of buf_si[] */ 4212 merge->nsend = 0; 4213 for (proc=0; proc<size; proc++){ 4214 len_si[proc] = 0; 4215 if (proc == rank){ 4216 len_s[proc] = 0; 4217 } else { 4218 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4219 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4220 } 4221 if (len_s[proc]) { 4222 merge->nsend++; 4223 nrows = 0; 4224 for (i=owners[proc]; i<owners[proc+1]; i++){ 4225 if (ai[i+1] > ai[i]) nrows++; 4226 } 4227 len_si[proc] = 2*(nrows+1); 4228 len += len_si[proc]; 4229 } 4230 } 4231 4232 /* determine the number and length of messages to receive for ij-structure */ 4233 /*-------------------------------------------------------------------------*/ 4234 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4235 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4236 4237 /* post the Irecv of j-structure */ 4238 /*-------------------------------*/ 4239 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4240 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4241 4242 /* post the Isend of j-structure */ 4243 /*--------------------------------*/ 4244 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 4245 sj_waits = si_waits + merge->nsend; 4246 4247 for (proc=0, k=0; proc<size; proc++){ 4248 if (!len_s[proc]) continue; 4249 i = owners[proc]; 4250 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4251 k++; 4252 } 4253 4254 /* receives and sends of j-structure are complete */ 4255 /*------------------------------------------------*/ 4256 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4257 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4258 4259 /* send and recv i-structure */ 4260 /*---------------------------*/ 4261 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4262 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4263 4264 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4265 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4266 for (proc=0,k=0; proc<size; proc++){ 4267 if (!len_s[proc]) continue; 4268 /* form outgoing message for i-structure: 4269 buf_si[0]: nrows to be sent 4270 [1:nrows]: row index (global) 4271 [nrows+1:2*nrows+1]: i-structure index 4272 */ 4273 /*-------------------------------------------*/ 4274 nrows = len_si[proc]/2 - 1; 4275 buf_si_i = buf_si + nrows+1; 4276 buf_si[0] = nrows; 4277 buf_si_i[0] = 0; 4278 nrows = 0; 4279 for (i=owners[proc]; i<owners[proc+1]; i++){ 4280 anzi = ai[i+1] - ai[i]; 4281 if (anzi) { 4282 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4283 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4284 nrows++; 4285 } 4286 } 4287 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4288 k++; 4289 buf_si += len_si[proc]; 4290 } 4291 4292 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4293 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4294 4295 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4296 for (i=0; i<merge->nrecv; i++){ 4297 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4298 } 4299 4300 ierr = PetscFree(len_si);CHKERRQ(ierr); 4301 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4302 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4303 ierr = PetscFree(si_waits);CHKERRQ(ierr); 4304 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4305 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4306 ierr = PetscFree(status);CHKERRQ(ierr); 4307 4308 /* compute a local seq matrix in each processor */ 4309 /*----------------------------------------------*/ 4310 /* allocate bi array and free space for accumulating nonzero column info */ 4311 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4312 bi[0] = 0; 4313 4314 /* create and initialize a linked list */ 4315 nlnk = N+1; 4316 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4317 4318 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4319 len = 0; 4320 len = ai[owners[rank+1]] - ai[owners[rank]]; 4321 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4322 current_space = free_space; 4323 4324 /* determine symbolic info for each local row */ 4325 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 4326 nextrow = buf_ri_k + merge->nrecv; 4327 nextai = nextrow + merge->nrecv; 4328 for (k=0; k<merge->nrecv; k++){ 4329 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4330 nrows = *buf_ri_k[k]; 4331 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4332 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4333 } 4334 4335 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4336 len = 0; 4337 for (i=0;i<m;i++) { 4338 bnzi = 0; 4339 /* add local non-zero cols of this proc's seqmat into lnk */ 4340 arow = owners[rank] + i; 4341 anzi = ai[arow+1] - ai[arow]; 4342 aj = a->j + ai[arow]; 4343 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4344 bnzi += nlnk; 4345 /* add received col data into lnk */ 4346 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4347 if (i == *nextrow[k]) { /* i-th row */ 4348 anzi = *(nextai[k]+1) - *nextai[k]; 4349 aj = buf_rj[k] + *nextai[k]; 4350 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4351 bnzi += nlnk; 4352 nextrow[k]++; nextai[k]++; 4353 } 4354 } 4355 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4356 4357 /* if free space is not available, make more free space */ 4358 if (current_space->local_remaining<bnzi) { 4359 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4360 nspacedouble++; 4361 } 4362 /* copy data into free space, then initialize lnk */ 4363 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4364 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4365 4366 current_space->array += bnzi; 4367 current_space->local_used += bnzi; 4368 current_space->local_remaining -= bnzi; 4369 4370 bi[i+1] = bi[i] + bnzi; 4371 } 4372 4373 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 4374 4375 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4376 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4377 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4378 4379 /* create symbolic parallel matrix B_mpi */ 4380 /*---------------------------------------*/ 4381 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4382 if (n==PETSC_DECIDE) { 4383 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4384 } else { 4385 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4386 } 4387 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4388 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4389 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4390 4391 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4392 B_mpi->assembled = PETSC_FALSE; 4393 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4394 merge->bi = bi; 4395 merge->bj = bj; 4396 merge->buf_ri = buf_ri; 4397 merge->buf_rj = buf_rj; 4398 merge->coi = PETSC_NULL; 4399 merge->coj = PETSC_NULL; 4400 merge->owners_co = PETSC_NULL; 4401 4402 /* attach the supporting struct to B_mpi for reuse */ 4403 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4404 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4405 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4406 *mpimat = B_mpi; 4407 4408 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4409 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 #undef __FUNCT__ 4414 #define __FUNCT__ "MatMerge_SeqsToMPI" 4415 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4416 { 4417 PetscErrorCode ierr; 4418 4419 PetscFunctionBegin; 4420 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4421 if (scall == MAT_INITIAL_MATRIX){ 4422 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4423 } 4424 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4425 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 #undef __FUNCT__ 4430 #define __FUNCT__ "MatGetLocalMat" 4431 /*@ 4432 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4433 4434 Not Collective 4435 4436 Input Parameters: 4437 + A - the matrix 4438 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4439 4440 Output Parameter: 4441 . A_loc - the local sequential matrix generated 4442 4443 Level: developer 4444 4445 @*/ 4446 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4447 { 4448 PetscErrorCode ierr; 4449 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4450 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4451 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4452 MatScalar *aa=a->a,*ba=b->a,*cam; 4453 PetscScalar *ca; 4454 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4455 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4456 4457 PetscFunctionBegin; 4458 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4459 if (scall == MAT_INITIAL_MATRIX){ 4460 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4461 ci[0] = 0; 4462 for (i=0; i<am; i++){ 4463 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4464 } 4465 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4466 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4467 k = 0; 4468 for (i=0; i<am; i++) { 4469 ncols_o = bi[i+1] - bi[i]; 4470 ncols_d = ai[i+1] - ai[i]; 4471 /* off-diagonal portion of A */ 4472 for (jo=0; jo<ncols_o; jo++) { 4473 col = cmap[*bj]; 4474 if (col >= cstart) break; 4475 cj[k] = col; bj++; 4476 ca[k++] = *ba++; 4477 } 4478 /* diagonal portion of A */ 4479 for (j=0; j<ncols_d; j++) { 4480 cj[k] = cstart + *aj++; 4481 ca[k++] = *aa++; 4482 } 4483 /* off-diagonal portion of A */ 4484 for (j=jo; j<ncols_o; j++) { 4485 cj[k] = cmap[*bj++]; 4486 ca[k++] = *ba++; 4487 } 4488 } 4489 /* put together the new matrix */ 4490 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4491 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4492 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4493 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4494 mat->free_a = PETSC_TRUE; 4495 mat->free_ij = PETSC_TRUE; 4496 mat->nonew = 0; 4497 } else if (scall == MAT_REUSE_MATRIX){ 4498 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4499 ci = mat->i; cj = mat->j; cam = mat->a; 4500 for (i=0; i<am; i++) { 4501 /* off-diagonal portion of A */ 4502 ncols_o = bi[i+1] - bi[i]; 4503 for (jo=0; jo<ncols_o; jo++) { 4504 col = cmap[*bj]; 4505 if (col >= cstart) break; 4506 *cam++ = *ba++; bj++; 4507 } 4508 /* diagonal portion of A */ 4509 ncols_d = ai[i+1] - ai[i]; 4510 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4511 /* off-diagonal portion of A */ 4512 for (j=jo; j<ncols_o; j++) { 4513 *cam++ = *ba++; bj++; 4514 } 4515 } 4516 } else { 4517 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4518 } 4519 4520 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4521 PetscFunctionReturn(0); 4522 } 4523 4524 #undef __FUNCT__ 4525 #define __FUNCT__ "MatGetLocalMatCondensed" 4526 /*@C 4527 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4528 4529 Not Collective 4530 4531 Input Parameters: 4532 + A - the matrix 4533 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4534 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4535 4536 Output Parameter: 4537 . A_loc - the local sequential matrix generated 4538 4539 Level: developer 4540 4541 @*/ 4542 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4543 { 4544 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4545 PetscErrorCode ierr; 4546 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4547 IS isrowa,iscola; 4548 Mat *aloc; 4549 4550 PetscFunctionBegin; 4551 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4552 if (!row){ 4553 start = A->rmap->rstart; end = A->rmap->rend; 4554 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4555 } else { 4556 isrowa = *row; 4557 } 4558 if (!col){ 4559 start = A->cmap->rstart; 4560 cmap = a->garray; 4561 nzA = a->A->cmap->n; 4562 nzB = a->B->cmap->n; 4563 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4564 ncols = 0; 4565 for (i=0; i<nzB; i++) { 4566 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4567 else break; 4568 } 4569 imark = i; 4570 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4571 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4572 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4573 ierr = PetscFree(idx);CHKERRQ(ierr); 4574 } else { 4575 iscola = *col; 4576 } 4577 if (scall != MAT_INITIAL_MATRIX){ 4578 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4579 aloc[0] = *A_loc; 4580 } 4581 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4582 *A_loc = aloc[0]; 4583 ierr = PetscFree(aloc);CHKERRQ(ierr); 4584 if (!row){ 4585 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4586 } 4587 if (!col){ 4588 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4589 } 4590 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4591 PetscFunctionReturn(0); 4592 } 4593 4594 #undef __FUNCT__ 4595 #define __FUNCT__ "MatGetBrowsOfAcols" 4596 /*@C 4597 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4598 4599 Collective on Mat 4600 4601 Input Parameters: 4602 + A,B - the matrices in mpiaij format 4603 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4604 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4605 4606 Output Parameter: 4607 + rowb, colb - index sets of rows and columns of B to extract 4608 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4609 - B_seq - the sequential matrix generated 4610 4611 Level: developer 4612 4613 @*/ 4614 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4615 { 4616 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4617 PetscErrorCode ierr; 4618 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4619 IS isrowb,iscolb; 4620 Mat *bseq; 4621 4622 PetscFunctionBegin; 4623 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4624 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4625 } 4626 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4627 4628 if (scall == MAT_INITIAL_MATRIX){ 4629 start = A->cmap->rstart; 4630 cmap = a->garray; 4631 nzA = a->A->cmap->n; 4632 nzB = a->B->cmap->n; 4633 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4634 ncols = 0; 4635 for (i=0; i<nzB; i++) { /* row < local row index */ 4636 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4637 else break; 4638 } 4639 imark = i; 4640 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4641 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4642 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4643 ierr = PetscFree(idx);CHKERRQ(ierr); 4644 *brstart = imark; 4645 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4646 } else { 4647 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4648 isrowb = *rowb; iscolb = *colb; 4649 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4650 bseq[0] = *B_seq; 4651 } 4652 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4653 *B_seq = bseq[0]; 4654 ierr = PetscFree(bseq);CHKERRQ(ierr); 4655 if (!rowb){ 4656 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4657 } else { 4658 *rowb = isrowb; 4659 } 4660 if (!colb){ 4661 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4662 } else { 4663 *colb = iscolb; 4664 } 4665 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4666 PetscFunctionReturn(0); 4667 } 4668 4669 #undef __FUNCT__ 4670 #define __FUNCT__ "MatGetBrowsOfAoCols" 4671 /*@C 4672 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4673 of the OFF-DIAGONAL portion of local A 4674 4675 Collective on Mat 4676 4677 Input Parameters: 4678 + A,B - the matrices in mpiaij format 4679 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4680 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4681 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4682 4683 Output Parameter: 4684 + B_oth - the sequential matrix generated 4685 4686 Level: developer 4687 4688 @*/ 4689 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth) 4690 { 4691 VecScatter_MPI_General *gen_to,*gen_from; 4692 PetscErrorCode ierr; 4693 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4694 Mat_SeqAIJ *b_oth; 4695 VecScatter ctx=a->Mvctx; 4696 MPI_Comm comm=((PetscObject)ctx)->comm; 4697 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4698 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4699 PetscScalar *rvalues,*svalues; 4700 MatScalar *b_otha,*bufa,*bufA; 4701 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4702 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4703 MPI_Status *sstatus,rstatus; 4704 PetscMPIInt jj; 4705 PetscInt *cols,sbs,rbs; 4706 PetscScalar *vals; 4707 4708 PetscFunctionBegin; 4709 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4710 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4711 } 4712 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4713 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4714 4715 gen_to = (VecScatter_MPI_General*)ctx->todata; 4716 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4717 rvalues = gen_from->values; /* holds the length of receiving row */ 4718 svalues = gen_to->values; /* holds the length of sending row */ 4719 nrecvs = gen_from->n; 4720 nsends = gen_to->n; 4721 4722 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4723 srow = gen_to->indices; /* local row index to be sent */ 4724 sstarts = gen_to->starts; 4725 sprocs = gen_to->procs; 4726 sstatus = gen_to->sstatus; 4727 sbs = gen_to->bs; 4728 rstarts = gen_from->starts; 4729 rprocs = gen_from->procs; 4730 rbs = gen_from->bs; 4731 4732 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4733 if (scall == MAT_INITIAL_MATRIX){ 4734 /* i-array */ 4735 /*---------*/ 4736 /* post receives */ 4737 for (i=0; i<nrecvs; i++){ 4738 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4739 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4740 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4741 } 4742 4743 /* pack the outgoing message */ 4744 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 4745 rstartsj = sstartsj + nsends +1; 4746 sstartsj[0] = 0; rstartsj[0] = 0; 4747 len = 0; /* total length of j or a array to be sent */ 4748 k = 0; 4749 for (i=0; i<nsends; i++){ 4750 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4752 for (j=0; j<nrows; j++) { 4753 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4754 for (l=0; l<sbs; l++){ 4755 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4756 rowlen[j*sbs+l] = ncols; 4757 len += ncols; 4758 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4759 } 4760 k++; 4761 } 4762 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4763 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4764 } 4765 /* recvs and sends of i-array are completed */ 4766 i = nrecvs; 4767 while (i--) { 4768 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4769 } 4770 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4771 4772 /* allocate buffers for sending j and a arrays */ 4773 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4774 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4775 4776 /* create i-array of B_oth */ 4777 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4778 b_othi[0] = 0; 4779 len = 0; /* total length of j or a array to be received */ 4780 k = 0; 4781 for (i=0; i<nrecvs; i++){ 4782 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4783 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4784 for (j=0; j<nrows; j++) { 4785 b_othi[k+1] = b_othi[k] + rowlen[j]; 4786 len += rowlen[j]; k++; 4787 } 4788 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4789 } 4790 4791 /* allocate space for j and a arrrays of B_oth */ 4792 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4793 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4794 4795 /* j-array */ 4796 /*---------*/ 4797 /* post receives of j-array */ 4798 for (i=0; i<nrecvs; i++){ 4799 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4800 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4801 } 4802 4803 /* pack the outgoing message j-array */ 4804 k = 0; 4805 for (i=0; i<nsends; i++){ 4806 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4807 bufJ = bufj+sstartsj[i]; 4808 for (j=0; j<nrows; j++) { 4809 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4810 for (ll=0; ll<sbs; ll++){ 4811 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4812 for (l=0; l<ncols; l++){ 4813 *bufJ++ = cols[l]; 4814 } 4815 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4816 } 4817 } 4818 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4819 } 4820 4821 /* recvs and sends of j-array are completed */ 4822 i = nrecvs; 4823 while (i--) { 4824 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4825 } 4826 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4827 } else if (scall == MAT_REUSE_MATRIX){ 4828 sstartsj = *startsj; 4829 rstartsj = sstartsj + nsends +1; 4830 bufa = *bufa_ptr; 4831 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4832 b_otha = b_oth->a; 4833 } else { 4834 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4835 } 4836 4837 /* a-array */ 4838 /*---------*/ 4839 /* post receives of a-array */ 4840 for (i=0; i<nrecvs; i++){ 4841 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4842 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4843 } 4844 4845 /* pack the outgoing message a-array */ 4846 k = 0; 4847 for (i=0; i<nsends; i++){ 4848 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4849 bufA = bufa+sstartsj[i]; 4850 for (j=0; j<nrows; j++) { 4851 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4852 for (ll=0; ll<sbs; ll++){ 4853 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4854 for (l=0; l<ncols; l++){ 4855 *bufA++ = vals[l]; 4856 } 4857 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4858 } 4859 } 4860 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4861 } 4862 /* recvs and sends of a-array are completed */ 4863 i = nrecvs; 4864 while (i--) { 4865 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4866 } 4867 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4868 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4869 4870 if (scall == MAT_INITIAL_MATRIX){ 4871 /* put together the new matrix */ 4872 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4873 4874 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4875 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4876 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4877 b_oth->free_a = PETSC_TRUE; 4878 b_oth->free_ij = PETSC_TRUE; 4879 b_oth->nonew = 0; 4880 4881 ierr = PetscFree(bufj);CHKERRQ(ierr); 4882 if (!startsj || !bufa_ptr){ 4883 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 4884 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4885 } else { 4886 *startsj = sstartsj; 4887 *bufa_ptr = bufa; 4888 } 4889 } 4890 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4891 PetscFunctionReturn(0); 4892 } 4893 4894 #undef __FUNCT__ 4895 #define __FUNCT__ "MatGetCommunicationStructs" 4896 /*@C 4897 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4898 4899 Not Collective 4900 4901 Input Parameters: 4902 . A - The matrix in mpiaij format 4903 4904 Output Parameter: 4905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4906 . colmap - A map from global column index to local index into lvec 4907 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4908 4909 Level: developer 4910 4911 @*/ 4912 #if defined (PETSC_USE_CTABLE) 4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4914 #else 4915 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4916 #endif 4917 { 4918 Mat_MPIAIJ *a; 4919 4920 PetscFunctionBegin; 4921 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4922 PetscValidPointer(lvec, 2) 4923 PetscValidPointer(colmap, 3) 4924 PetscValidPointer(multScatter, 4) 4925 a = (Mat_MPIAIJ *) A->data; 4926 if (lvec) *lvec = a->lvec; 4927 if (colmap) *colmap = a->colmap; 4928 if (multScatter) *multScatter = a->Mvctx; 4929 PetscFunctionReturn(0); 4930 } 4931 4932 EXTERN_C_BEGIN 4933 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4935 EXTERN_C_END 4936 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4939 /* 4940 Computes (B'*A')' since computing B*A directly is untenable 4941 4942 n p p 4943 ( ) ( ) ( ) 4944 m ( A ) * n ( B ) = m ( C ) 4945 ( ) ( ) ( ) 4946 4947 */ 4948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4949 { 4950 PetscErrorCode ierr; 4951 Mat At,Bt,Ct; 4952 4953 PetscFunctionBegin; 4954 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4955 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4956 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4957 ierr = MatDestroy(At);CHKERRQ(ierr); 4958 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4959 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4960 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4961 PetscFunctionReturn(0); 4962 } 4963 4964 #undef __FUNCT__ 4965 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4966 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4967 { 4968 PetscErrorCode ierr; 4969 PetscInt m=A->rmap->n,n=B->cmap->n; 4970 Mat Cmat; 4971 4972 PetscFunctionBegin; 4973 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4974 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4975 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4976 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4977 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4978 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4979 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4980 *C = Cmat; 4981 PetscFunctionReturn(0); 4982 } 4983 4984 /* ----------------------------------------------------------------*/ 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4987 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4988 { 4989 PetscErrorCode ierr; 4990 4991 PetscFunctionBegin; 4992 if (scall == MAT_INITIAL_MATRIX){ 4993 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4994 } 4995 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4996 PetscFunctionReturn(0); 4997 } 4998 4999 EXTERN_C_BEGIN 5000 #if defined(PETSC_HAVE_MUMPS) 5001 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 5002 #endif 5003 #if defined(PETSC_HAVE_PASTIX) 5004 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5005 #endif 5006 #if defined(PETSC_HAVE_SUPERLU_DIST) 5007 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5008 #endif 5009 #if defined(PETSC_HAVE_SPOOLES) 5010 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5011 #endif 5012 EXTERN_C_END 5013 5014 /*MC 5015 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5016 5017 Options Database Keys: 5018 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5019 5020 Level: beginner 5021 5022 .seealso: MatCreateMPIAIJ() 5023 M*/ 5024 5025 EXTERN_C_BEGIN 5026 #undef __FUNCT__ 5027 #define __FUNCT__ "MatCreate_MPIAIJ" 5028 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5029 { 5030 Mat_MPIAIJ *b; 5031 PetscErrorCode ierr; 5032 PetscMPIInt size; 5033 5034 PetscFunctionBegin; 5035 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5036 5037 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5038 B->data = (void*)b; 5039 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5040 B->rmap->bs = 1; 5041 B->assembled = PETSC_FALSE; 5042 B->mapping = 0; 5043 5044 B->insertmode = NOT_SET_VALUES; 5045 b->size = size; 5046 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5047 5048 /* build cache for off array entries formed */ 5049 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5050 b->donotstash = PETSC_FALSE; 5051 b->colmap = 0; 5052 b->garray = 0; 5053 b->roworiented = PETSC_TRUE; 5054 5055 /* stuff used for matrix vector multiply */ 5056 b->lvec = PETSC_NULL; 5057 b->Mvctx = PETSC_NULL; 5058 5059 /* stuff for MatGetRow() */ 5060 b->rowindices = 0; 5061 b->rowvalues = 0; 5062 b->getrowactive = PETSC_FALSE; 5063 5064 #if defined(PETSC_HAVE_SPOOLES) 5065 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5066 "MatGetFactor_mpiaij_spooles", 5067 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5068 #endif 5069 #if defined(PETSC_HAVE_MUMPS) 5070 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5071 "MatGetFactor_mpiaij_mumps", 5072 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5073 #endif 5074 #if defined(PETSC_HAVE_PASTIX) 5075 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5076 "MatGetFactor_mpiaij_pastix", 5077 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5078 #endif 5079 #if defined(PETSC_HAVE_SUPERLU_DIST) 5080 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5081 "MatGetFactor_mpiaij_superlu_dist", 5082 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5083 #endif 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5085 "MatStoreValues_MPIAIJ", 5086 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5088 "MatRetrieveValues_MPIAIJ", 5089 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5091 "MatGetDiagonalBlock_MPIAIJ", 5092 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5094 "MatIsTranspose_MPIAIJ", 5095 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5097 "MatMPIAIJSetPreallocation_MPIAIJ", 5098 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5100 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5101 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5103 "MatDiagonalScaleLocal_MPIAIJ", 5104 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5106 "MatConvert_MPIAIJ_MPICSRPERM", 5107 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5109 "MatConvert_MPIAIJ_MPICRL", 5110 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5112 "MatConvert_MPIAIJ_MPISBAIJ", 5113 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5115 "MatMatMult_MPIDense_MPIAIJ", 5116 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5118 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5119 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5120 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5121 "MatMatMultNumeric_MPIDense_MPIAIJ", 5122 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5123 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5124 PetscFunctionReturn(0); 5125 } 5126 EXTERN_C_END 5127 5128 #undef __FUNCT__ 5129 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5130 /*@ 5131 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5132 and "off-diagonal" part of the matrix in CSR format. 5133 5134 Collective on MPI_Comm 5135 5136 Input Parameters: 5137 + comm - MPI communicator 5138 . m - number of local rows (Cannot be PETSC_DECIDE) 5139 . n - This value should be the same as the local size used in creating the 5140 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5141 calculated if N is given) For square matrices n is almost always m. 5142 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5143 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5144 . i - row indices for "diagonal" portion of matrix 5145 . j - column indices 5146 . a - matrix values 5147 . oi - row indices for "off-diagonal" portion of matrix 5148 . oj - column indices 5149 - oa - matrix values 5150 5151 Output Parameter: 5152 . mat - the matrix 5153 5154 Level: advanced 5155 5156 Notes: 5157 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5158 5159 The i and j indices are 0 based 5160 5161 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5162 5163 This sets local rows and cannot be used to set off-processor values. 5164 5165 You cannot later use MatSetValues() to change values in this matrix. 5166 5167 .keywords: matrix, aij, compressed row, sparse, parallel 5168 5169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5170 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5171 @*/ 5172 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5173 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5174 { 5175 PetscErrorCode ierr; 5176 Mat_MPIAIJ *maij; 5177 5178 PetscFunctionBegin; 5179 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5180 if (i[0]) { 5181 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5182 } 5183 if (oi[0]) { 5184 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5185 } 5186 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5187 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5188 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5189 maij = (Mat_MPIAIJ*) (*mat)->data; 5190 maij->donotstash = PETSC_TRUE; 5191 (*mat)->preallocated = PETSC_TRUE; 5192 5193 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5194 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5195 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5196 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5197 5198 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5199 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5200 5201 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5203 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5204 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5205 5206 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5207 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5208 PetscFunctionReturn(0); 5209 } 5210 5211 /* 5212 Special version for direct calls from Fortran 5213 */ 5214 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5215 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5216 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5217 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5218 #endif 5219 5220 /* Change these macros so can be used in void function */ 5221 #undef CHKERRQ 5222 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5223 #undef SETERRQ2 5224 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5225 #undef SETERRQ 5226 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5227 5228 EXTERN_C_BEGIN 5229 #undef __FUNCT__ 5230 #define __FUNCT__ "matsetvaluesmpiaij_" 5231 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5232 { 5233 Mat mat = *mmat; 5234 PetscInt m = *mm, n = *mn; 5235 InsertMode addv = *maddv; 5236 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5237 PetscScalar value; 5238 PetscErrorCode ierr; 5239 5240 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5241 if (mat->insertmode == NOT_SET_VALUES) { 5242 mat->insertmode = addv; 5243 } 5244 #if defined(PETSC_USE_DEBUG) 5245 else if (mat->insertmode != addv) { 5246 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5247 } 5248 #endif 5249 { 5250 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5251 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5252 PetscTruth roworiented = aij->roworiented; 5253 5254 /* Some Variables required in the macro */ 5255 Mat A = aij->A; 5256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5257 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5258 MatScalar *aa = a->a; 5259 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5260 Mat B = aij->B; 5261 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5262 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5263 MatScalar *ba = b->a; 5264 5265 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5266 PetscInt nonew = a->nonew; 5267 MatScalar *ap1,*ap2; 5268 5269 PetscFunctionBegin; 5270 for (i=0; i<m; i++) { 5271 if (im[i] < 0) continue; 5272 #if defined(PETSC_USE_DEBUG) 5273 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5274 #endif 5275 if (im[i] >= rstart && im[i] < rend) { 5276 row = im[i] - rstart; 5277 lastcol1 = -1; 5278 rp1 = aj + ai[row]; 5279 ap1 = aa + ai[row]; 5280 rmax1 = aimax[row]; 5281 nrow1 = ailen[row]; 5282 low1 = 0; 5283 high1 = nrow1; 5284 lastcol2 = -1; 5285 rp2 = bj + bi[row]; 5286 ap2 = ba + bi[row]; 5287 rmax2 = bimax[row]; 5288 nrow2 = bilen[row]; 5289 low2 = 0; 5290 high2 = nrow2; 5291 5292 for (j=0; j<n; j++) { 5293 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5294 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5295 if (in[j] >= cstart && in[j] < cend){ 5296 col = in[j] - cstart; 5297 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5298 } else if (in[j] < 0) continue; 5299 #if defined(PETSC_USE_DEBUG) 5300 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5301 #endif 5302 else { 5303 if (mat->was_assembled) { 5304 if (!aij->colmap) { 5305 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5306 } 5307 #if defined (PETSC_USE_CTABLE) 5308 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5309 col--; 5310 #else 5311 col = aij->colmap[in[j]] - 1; 5312 #endif 5313 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5314 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5315 col = in[j]; 5316 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5317 B = aij->B; 5318 b = (Mat_SeqAIJ*)B->data; 5319 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5320 rp2 = bj + bi[row]; 5321 ap2 = ba + bi[row]; 5322 rmax2 = bimax[row]; 5323 nrow2 = bilen[row]; 5324 low2 = 0; 5325 high2 = nrow2; 5326 bm = aij->B->rmap->n; 5327 ba = b->a; 5328 } 5329 } else col = in[j]; 5330 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5331 } 5332 } 5333 } else { 5334 if (!aij->donotstash) { 5335 if (roworiented) { 5336 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5337 } else { 5338 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5339 } 5340 } 5341 } 5342 }} 5343 PetscFunctionReturnVoid(); 5344 } 5345 EXTERN_C_END 5346 5347