1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatDistribute_MPIAIJ" 7 /* 8 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 9 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 10 11 Only for square matrices 12 */ 13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 14 { 15 PetscMPIInt rank,size; 16 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 17 PetscErrorCode ierr; 18 Mat mat; 19 Mat_SeqAIJ *gmata; 20 PetscMPIInt tag; 21 MPI_Status status; 22 PetscTruth aij; 23 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 24 25 PetscFunctionBegin; 26 CHKMEMQ; 27 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 28 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 29 if (!rank) { 30 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 31 if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 32 } 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 35 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 36 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 37 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 38 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 39 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 40 rowners[0] = 0; 41 for (i=2; i<=size; i++) { 42 rowners[i] += rowners[i-1]; 43 } 44 rstart = rowners[rank]; 45 rend = rowners[rank+1]; 46 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 47 if (!rank) { 48 gmata = (Mat_SeqAIJ*) gmat->data; 49 /* send row lengths to all processors */ 50 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 51 for (i=1; i<size; i++) { 52 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 53 } 54 /* determine number diagonal and off-diagonal counts */ 55 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 56 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 57 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 58 jj = 0; 59 for (i=0; i<m; i++) { 60 for (j=0; j<dlens[i]; j++) { 61 if (gmata->j[jj] < rstart) ld[i]++; 62 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 63 jj++; 64 } 65 } 66 /* send column indices to other processes */ 67 for (i=1; i<size; i++) { 68 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 69 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 70 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 } 72 73 /* send numerical values to other processes */ 74 for (i=1; i<size; i++) { 75 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 76 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 77 } 78 gmataa = gmata->a; 79 gmataj = gmata->j; 80 81 } else { 82 /* receive row lengths */ 83 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 84 /* receive column indices */ 85 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 87 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 88 /* determine number diagonal and off-diagonal counts */ 89 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 90 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 91 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 92 jj = 0; 93 for (i=0; i<m; i++) { 94 for (j=0; j<dlens[i]; j++) { 95 if (gmataj[jj] < rstart) ld[i]++; 96 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 97 jj++; 98 } 99 } 100 /* receive numerical values */ 101 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 103 } 104 /* set preallocation */ 105 for (i=0; i<m; i++) { 106 dlens[i] -= olens[i]; 107 } 108 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 109 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 110 111 for (i=0; i<m; i++) { 112 dlens[i] += olens[i]; 113 } 114 cnt = 0; 115 for (i=0; i<m; i++) { 116 row = rstart + i; 117 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 118 cnt += dlens[i]; 119 } 120 if (rank) { 121 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 122 } 123 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 124 ierr = PetscFree(rowners);CHKERRQ(ierr); 125 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 126 *inmat = mat; 127 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 129 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 130 mat = *inmat; 131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 132 if (!rank) { 133 /* send numerical values to other processes */ 134 gmata = (Mat_SeqAIJ*) gmat->data; 135 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 136 gmataa = gmata->a; 137 for (i=1; i<size; i++) { 138 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 139 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 140 } 141 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 142 } else { 143 /* receive numerical values from process 0*/ 144 nz = Ad->nz + Ao->nz; 145 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 146 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 147 } 148 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 149 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 150 ad = Ad->a; 151 ao = Ao->a; 152 if (mat->rmap->n) { 153 i = 0; 154 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 155 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 156 } 157 for (i=1; i<mat->rmap->n; i++) { 158 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 159 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 160 } 161 i--; 162 if (mat->rmap->n) { 163 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 164 } 165 if (rank) { 166 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 167 } 168 } 169 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 CHKMEMQ; 172 PetscFunctionReturn(0); 173 } 174 175 /* 176 Local utility routine that creates a mapping from the global column 177 number to the local number in the off-diagonal part of the local 178 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 179 a slightly higher hash table cost; without it it is not scalable (each processor 180 has an order N integer array but is fast to acess. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 187 PetscErrorCode ierr; 188 PetscInt n = aij->B->cmap->n,i; 189 190 PetscFunctionBegin; 191 #if defined (PETSC_USE_CTABLE) 192 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 193 for (i=0; i<n; i++){ 194 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 195 } 196 #else 197 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 201 #endif 202 PetscFunctionReturn(0); 203 } 204 205 206 #define CHUNKSIZE 15 207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 208 { \ 209 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 210 lastcol1 = col;\ 211 while (high1-low1 > 5) { \ 212 t = (low1+high1)/2; \ 213 if (rp1[t] > col) high1 = t; \ 214 else low1 = t; \ 215 } \ 216 for (_i=low1; _i<high1; _i++) { \ 217 if (rp1[_i] > col) break; \ 218 if (rp1[_i] == col) { \ 219 if (addv == ADD_VALUES) ap1[_i] += value; \ 220 else ap1[_i] = value; \ 221 goto a_noinsert; \ 222 } \ 223 } \ 224 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 225 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 226 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 227 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 228 N = nrow1++ - 1; a->nz++; high1++; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp1[ii+1] = rp1[ii]; \ 232 ap1[ii+1] = ap1[ii]; \ 233 } \ 234 rp1[_i] = col; \ 235 ap1[_i] = value; \ 236 a_noinsert: ; \ 237 ailen[row] = nrow1; \ 238 } 239 240 241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 242 { \ 243 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 244 lastcol2 = col;\ 245 while (high2-low2 > 5) { \ 246 t = (low2+high2)/2; \ 247 if (rp2[t] > col) high2 = t; \ 248 else low2 = t; \ 249 } \ 250 for (_i=low2; _i<high2; _i++) { \ 251 if (rp2[_i] > col) break; \ 252 if (rp2[_i] == col) { \ 253 if (addv == ADD_VALUES) ap2[_i] += value; \ 254 else ap2[_i] = value; \ 255 goto b_noinsert; \ 256 } \ 257 } \ 258 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 260 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 261 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 262 N = nrow2++ - 1; b->nz++; high2++; \ 263 /* shift up all the later entries in this row */ \ 264 for (ii=N; ii>=_i; ii--) { \ 265 rp2[ii+1] = rp2[ii]; \ 266 ap2[ii+1] = ap2[ii]; \ 267 } \ 268 rp2[_i] = col; \ 269 ap2[_i] = value; \ 270 b_noinsert: ; \ 271 bilen[row] = nrow2; \ 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 277 { 278 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 280 PetscErrorCode ierr; 281 PetscInt l,*garray = mat->garray,diag; 282 283 PetscFunctionBegin; 284 /* code only works for square matrices A */ 285 286 /* find size of row to the left of the diagonal part */ 287 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 288 row = row - diag; 289 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 290 if (garray[b->j[b->i[row]+l]] > diag) break; 291 } 292 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 293 294 /* diagonal part */ 295 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 296 297 /* right of diagonal part */ 298 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIAIJ" 304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscScalar value; 308 PetscErrorCode ierr; 309 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 310 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 311 PetscTruth roworiented = aij->roworiented; 312 313 /* Some Variables required in the macro */ 314 Mat A = aij->A; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 316 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 317 MatScalar *aa = a->a; 318 PetscTruth ignorezeroentries = a->ignorezeroentries; 319 Mat B = aij->B; 320 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 321 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 322 MatScalar *ba = b->a; 323 324 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 325 PetscInt nonew = a->nonew; 326 MatScalar *ap1,*ap2; 327 328 PetscFunctionBegin; 329 if (v) PetscValidScalarPointer(v,6); 330 for (i=0; i<m; i++) { 331 if (im[i] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 334 #endif 335 if (im[i] >= rstart && im[i] < rend) { 336 row = im[i] - rstart; 337 lastcol1 = -1; 338 rp1 = aj + ai[row]; 339 ap1 = aa + ai[row]; 340 rmax1 = aimax[row]; 341 nrow1 = ailen[row]; 342 low1 = 0; 343 high1 = nrow1; 344 lastcol2 = -1; 345 rp2 = bj + bi[row]; 346 ap2 = ba + bi[row]; 347 rmax2 = bimax[row]; 348 nrow2 = bilen[row]; 349 low2 = 0; 350 high2 = nrow2; 351 352 for (j=0; j<n; j++) { 353 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 354 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 355 if (in[j] >= cstart && in[j] < cend){ 356 col = in[j] - cstart; 357 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 358 } else if (in[j] < 0) continue; 359 #if defined(PETSC_USE_DEBUG) 360 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 361 #endif 362 else { 363 if (mat->was_assembled) { 364 if (!aij->colmap) { 365 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 366 } 367 #if defined (PETSC_USE_CTABLE) 368 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 369 col--; 370 #else 371 col = aij->colmap[in[j]] - 1; 372 #endif 373 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 374 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 375 col = in[j]; 376 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 377 B = aij->B; 378 b = (Mat_SeqAIJ*)B->data; 379 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 380 rp2 = bj + bi[row]; 381 ap2 = ba + bi[row]; 382 rmax2 = bimax[row]; 383 nrow2 = bilen[row]; 384 low2 = 0; 385 high2 = nrow2; 386 bm = aij->B->rmap->n; 387 ba = b->a; 388 } 389 } else col = in[j]; 390 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 391 } 392 } 393 } else { 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) { 469 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 470 } 471 mat->insertmode = addv; /* in case this processor had no cache */ 472 473 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 485 PetscErrorCode ierr; 486 PetscMPIInt n; 487 PetscInt i,j,rstart,ncols,flg; 488 PetscInt *row,*col; 489 PetscTruth other_disassembled; 490 PetscScalar *val; 491 InsertMode addv = mat->insertmode; 492 493 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!aij->donotstash) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 503 if (j < n) ncols = j-i; 504 else ncols = n-i; 505 /* Now assemble all these values with a single function call */ 506 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 507 i = j; 508 } 509 } 510 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 511 } 512 a->compressedrow.use = PETSC_FALSE; 513 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 514 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 515 516 /* determine if any processor has disassembled, if so we must 517 also disassemble ourselfs, in order that we may reassemble. */ 518 /* 519 if nonzero structure of submatrix B cannot change then we know that 520 no processor disassembled thus we can skip this stuff 521 */ 522 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 523 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 524 if (mat->was_assembled && !other_disassembled) { 525 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 } 527 } 528 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 529 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 530 } 531 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 532 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 533 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 535 536 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 537 aij->rowvalues = 0; 538 539 /* used by MatAXPY() */ 540 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 541 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 542 543 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 544 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 551 { 552 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 557 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatZeroRows_MPIAIJ" 563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 564 { 565 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 568 PetscInt i,*owners = A->rmap->range; 569 PetscInt *nprocs,j,idx,nsends,row; 570 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 571 PetscInt *rvalues,count,base,slen,*source; 572 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 573 MPI_Comm comm = ((PetscObject)A)->comm; 574 MPI_Request *send_waits,*recv_waits; 575 MPI_Status recv_status,*send_status; 576 #if defined(PETSC_DEBUG) 577 PetscTruth found = PETSC_FALSE; 578 #endif 579 580 PetscFunctionBegin; 581 /* first count number of contributors to each processor */ 582 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 583 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 584 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 585 j = 0; 586 for (i=0; i<N; i++) { 587 if (lastidx > (idx = rows[i])) j = 0; 588 lastidx = idx; 589 for (; j<size; j++) { 590 if (idx >= owners[j] && idx < owners[j+1]) { 591 nprocs[2*j]++; 592 nprocs[2*j+1] = 1; 593 owner[i] = j; 594 #if defined(PETSC_DEBUG) 595 found = PETSC_TRUE; 596 #endif 597 break; 598 } 599 } 600 #if defined(PETSC_DEBUG) 601 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 602 found = PETSC_FALSE; 603 #endif 604 } 605 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 606 607 /* inform other processors of number of messages and max length*/ 608 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 609 610 /* post receives: */ 611 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 612 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 613 for (i=0; i<nrecvs; i++) { 614 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 615 } 616 617 /* do sends: 618 1) starts[i] gives the starting index in svalues for stuff going to 619 the ith processor 620 */ 621 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 622 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 623 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 624 starts[0] = 0; 625 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 626 for (i=0; i<N; i++) { 627 svalues[starts[owner[i]]++] = rows[i]; 628 } 629 630 starts[0] = 0; 631 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 632 count = 0; 633 for (i=0; i<size; i++) { 634 if (nprocs[2*i+1]) { 635 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscFree(starts);CHKERRQ(ierr); 639 640 base = owners[rank]; 641 642 /* wait on receives */ 643 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 644 count = nrecvs; slen = 0; 645 while (count) { 646 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 647 /* unpack receives into our local space */ 648 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 649 source[imdex] = recv_status.MPI_SOURCE; 650 lens[imdex] = n; 651 slen += n; 652 count--; 653 } 654 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 655 656 /* move the data into the send scatter */ 657 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 658 count = 0; 659 for (i=0; i<nrecvs; i++) { 660 values = rvalues + i*nmax; 661 for (j=0; j<lens[i]; j++) { 662 lrows[count++] = values[j] - base; 663 } 664 } 665 ierr = PetscFree(rvalues);CHKERRQ(ierr); 666 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 667 ierr = PetscFree(owner);CHKERRQ(ierr); 668 ierr = PetscFree(nprocs);CHKERRQ(ierr); 669 670 /* actually zap the local rows */ 671 /* 672 Zero the required rows. If the "diagonal block" of the matrix 673 is square and the user wishes to set the diagonal we use separate 674 code so that MatSetValues() is not called for each diagonal allocating 675 new memory, thus calling lots of mallocs and slowing things down. 676 677 */ 678 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 679 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 680 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 681 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 682 } else if (diag != 0.0) { 683 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 684 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 685 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 687 } 688 for (i = 0; i < slen; i++) { 689 row = lrows[i] + rstart; 690 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 } else { 695 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(lrows);CHKERRQ(ierr); 698 699 /* wait on sends */ 700 if (nsends) { 701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 703 ierr = PetscFree(send_status);CHKERRQ(ierr); 704 } 705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 706 ierr = PetscFree(svalues);CHKERRQ(ierr); 707 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMult_MPIAIJ" 713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscInt nt; 718 719 PetscFunctionBegin; 720 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 721 if (nt != A->cmap->n) { 722 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 723 } 724 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 726 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultAdd_MPIAIJ" 745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 753 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 761 { 762 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 763 PetscErrorCode ierr; 764 PetscTruth merged; 765 766 PetscFunctionBegin; 767 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 768 /* do nondiagonal part */ 769 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 770 if (!merged) { 771 /* send it on its way */ 772 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 773 /* do local part */ 774 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 775 /* receive remote parts: note this assumes the values are not actually */ 776 /* added in yy until the next line, */ 777 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 778 } else { 779 /* do local part */ 780 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 781 /* send it on its way */ 782 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 /* values actually were received in the Begin() but we need to call this nop */ 784 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 EXTERN_C_BEGIN 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 793 { 794 MPI_Comm comm; 795 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 796 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 797 IS Me,Notme; 798 PetscErrorCode ierr; 799 PetscInt M,N,first,last,*notme,i; 800 PetscMPIInt size; 801 802 PetscFunctionBegin; 803 804 /* Easy test: symmetric diagonal block */ 805 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 806 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 807 if (!*f) PetscFunctionReturn(0); 808 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 810 if (size == 1) PetscFunctionReturn(0); 811 812 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 813 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 814 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 815 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 816 for (i=0; i<first; i++) notme[i] = i; 817 for (i=last; i<M; i++) notme[i-last+first] = i; 818 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 819 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 820 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 821 Aoff = Aoffs[0]; 822 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 823 Boff = Boffs[0]; 824 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 825 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 826 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 827 ierr = ISDestroy(Me);CHKERRQ(ierr); 828 ierr = ISDestroy(Notme);CHKERRQ(ierr); 829 830 PetscFunctionReturn(0); 831 } 832 EXTERN_C_END 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* do nondiagonal part */ 843 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 844 /* send it on its way */ 845 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 846 /* do local part */ 847 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 848 /* receive remote parts */ 849 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 This only works correctly for square matrices where the subblock A->A is the 855 diagonal block 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 860 { 861 PetscErrorCode ierr; 862 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 863 864 PetscFunctionBegin; 865 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 866 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) { 867 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 868 } 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 900 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 939 PetscFunctionBegin; 940 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 942 nz = A->nz + B->nz; 943 if (!rank) { 944 header[0] = MAT_FILE_COOKIE; 945 header[1] = mat->rmap->N; 946 header[2] = mat->cmap->N; 947 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 949 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 950 /* get largest number of rows any processor has */ 951 rlen = mat->rmap->n; 952 range = mat->rmap->range; 953 for (i=1; i<size; i++) { 954 rlen = PetscMax(rlen,range[i+1] - range[i]); 955 } 956 } else { 957 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 958 rlen = mat->rmap->n; 959 } 960 961 /* load up the local row counts */ 962 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 963 for (i=0; i<mat->rmap->n; i++) { 964 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 965 } 966 967 /* store the row lengths to the file */ 968 if (!rank) { 969 MPI_Status status; 970 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 971 for (i=1; i<size; i++) { 972 rlen = range[i+1] - range[i]; 973 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 974 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 975 } 976 } else { 977 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 980 981 /* load up the local column indices */ 982 nzmax = nz; /* )th processor needs space a largest processor needs */ 983 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 984 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 985 cnt = 0; 986 for (i=0; i<mat->rmap->n; i++) { 987 for (j=B->i[i]; j<B->i[i+1]; j++) { 988 if ( (col = garray[B->j[j]]) > cstart) break; 989 column_indices[cnt++] = col; 990 } 991 for (k=A->i[i]; k<A->i[i+1]; k++) { 992 column_indices[cnt++] = A->j[k] + cstart; 993 } 994 for (; j<B->i[i+1]; j++) { 995 column_indices[cnt++] = garray[B->j[j]]; 996 } 997 } 998 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 999 1000 /* store the column indices to the file */ 1001 if (!rank) { 1002 MPI_Status status; 1003 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1004 for (i=1; i<size; i++) { 1005 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1006 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1007 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1008 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1009 } 1010 } else { 1011 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the local column values */ 1017 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<mat->rmap->n; i++) { 1020 for (j=B->i[i]; j<B->i[i+1]; j++) { 1021 if ( garray[B->j[j]] > cstart) break; 1022 column_values[cnt++] = B->a[j]; 1023 } 1024 for (k=A->i[i]; k<A->i[i+1]; k++) { 1025 column_values[cnt++] = A->a[k]; 1026 } 1027 for (; j<B->i[i+1]; j++) { 1028 column_values[cnt++] = B->a[j]; 1029 } 1030 } 1031 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1032 1033 /* store the column values to the file */ 1034 if (!rank) { 1035 MPI_Status status; 1036 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1037 for (i=1; i<size; i++) { 1038 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1039 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1040 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1041 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1042 } 1043 } else { 1044 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1045 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1046 } 1047 ierr = PetscFree(column_values);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1054 { 1055 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1056 PetscErrorCode ierr; 1057 PetscMPIInt rank = aij->rank,size = aij->size; 1058 PetscTruth isdraw,iascii,isbinary; 1059 PetscViewer sviewer; 1060 PetscViewerFormat format; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1064 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1065 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1066 if (iascii) { 1067 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1068 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1069 MatInfo info; 1070 PetscTruth inodes; 1071 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1074 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1075 if (!inodes) { 1076 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1077 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1080 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1081 } 1082 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1084 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1088 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1091 PetscInt inodecount,inodelimit,*inodes; 1092 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1093 if (inodes) { 1094 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1095 } else { 1096 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1097 } 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1100 PetscFunctionReturn(0); 1101 } 1102 } else if (isbinary) { 1103 if (size == 1) { 1104 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1105 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } else if (isdraw) { 1111 PetscDraw draw; 1112 PetscTruth isnull; 1113 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1114 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1115 } 1116 1117 if (size == 1) { 1118 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1119 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1120 } else { 1121 /* assemble the entire matrix onto first processor. */ 1122 Mat A; 1123 Mat_SeqAIJ *Aloc; 1124 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1125 MatScalar *a; 1126 1127 if (mat->rmap->N > 1024) { 1128 PetscTruth flg = PETSC_FALSE; 1129 1130 ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1131 if (!flg) { 1132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1133 } 1134 } 1135 1136 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1137 if (!rank) { 1138 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1141 } 1142 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1143 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1144 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1146 1147 /* copy over the A part */ 1148 Aloc = (Mat_SeqAIJ*)aij->A->data; 1149 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 row = mat->rmap->rstart; 1151 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1152 for (i=0; i<m; i++) { 1153 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1154 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1155 } 1156 aj = Aloc->j; 1157 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1158 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqAIJ*)aij->B->data; 1161 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 row = mat->rmap->rstart; 1163 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1164 ct = cols; 1165 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1166 for (i=0; i<m; i++) { 1167 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1168 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1169 } 1170 ierr = PetscFree(ct);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ" 1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 if (iascii || isdraw || isbinary || issocket) { 1201 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatSOR_MPIAIJ" 1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1211 { 1212 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1213 PetscErrorCode ierr; 1214 Vec bb1 = 0; 1215 PetscTruth hasop; 1216 1217 PetscFunctionBegin; 1218 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1219 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1220 } 1221 1222 if (flag == SOR_APPLY_UPPER) { 1223 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1228 if (flag & SOR_ZERO_INITIAL_GUESS) { 1229 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1230 its--; 1231 } 1232 1233 while (its--) { 1234 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 1237 /* update rhs: bb1 = bb - B*x */ 1238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1240 1241 /* local sweep */ 1242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1243 } 1244 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1245 if (flag & SOR_ZERO_INITIAL_GUESS) { 1246 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1247 its--; 1248 } 1249 while (its--) { 1250 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* update rhs: bb1 = bb - B*x */ 1254 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1256 1257 /* local sweep */ 1258 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1259 } 1260 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1261 if (flag & SOR_ZERO_INITIAL_GUESS) { 1262 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1263 its--; 1264 } 1265 while (its--) { 1266 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 1269 /* update rhs: bb1 = bb - B*x */ 1270 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1271 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1272 1273 /* local sweep */ 1274 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1275 } 1276 } else if (flag & SOR_EISENSTAT) { 1277 Vec xx1; 1278 1279 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1281 1282 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 if (!mat->diag) { 1285 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1287 } 1288 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1289 if (hasop) { 1290 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1291 } else { 1292 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1293 } 1294 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1295 1296 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1297 1298 /* local sweep */ 1299 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1300 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1301 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1304 } 1305 1306 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatPermute_MPIAIJ" 1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1313 { 1314 MPI_Comm comm,pcomm; 1315 PetscInt first,local_size,nrows; 1316 const PetscInt *rows; 1317 PetscMPIInt size; 1318 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1323 /* make a collective version of 'rowp' */ 1324 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1325 if (pcomm==comm) { 1326 crowp = rowp; 1327 } else { 1328 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1329 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1330 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1332 } 1333 /* collect the global row permutation and invert it */ 1334 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1335 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1336 if (pcomm!=comm) { 1337 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1338 } 1339 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1340 /* get the local target indices */ 1341 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1342 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1344 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1346 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1347 /* the column permutation is so much easier; 1348 make a local version of 'colp' and invert it */ 1349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1351 if (size==1) { 1352 lcolp = colp; 1353 } else { 1354 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1355 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1356 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 1357 } 1358 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1359 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1360 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1361 if (size>1) { 1362 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1363 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1364 } 1365 /* now we just get the submatrix */ 1366 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1367 /* clean up */ 1368 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1369 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1378 Mat A = mat->A,B = mat->B; 1379 PetscErrorCode ierr; 1380 PetscReal isend[5],irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = 1.0; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetOption_MPIAIJ" 1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 switch (op) { 1427 case MAT_NEW_NONZERO_LOCATIONS: 1428 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1429 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1430 case MAT_KEEP_NONZERO_PATTERN: 1431 case MAT_NEW_NONZERO_LOCATION_ERR: 1432 case MAT_USE_INODES: 1433 case MAT_IGNORE_ZERO_ENTRIES: 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1441 break; 1442 case MAT_NEW_DIAGONALS: 1443 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1444 break; 1445 case MAT_IGNORE_OFF_PROC_ENTRIES: 1446 a->donotstash = PETSC_TRUE; 1447 break; 1448 case MAT_SYMMETRIC: 1449 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1450 break; 1451 case MAT_STRUCTURALLY_SYMMETRIC: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 break; 1454 case MAT_HERMITIAN: 1455 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1456 break; 1457 case MAT_SYMMETRY_ETERNAL: 1458 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1459 break; 1460 default: 1461 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetRow_MPIAIJ" 1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1469 { 1470 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1471 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1472 PetscErrorCode ierr; 1473 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1474 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1475 PetscInt *cmap,*idx_p; 1476 1477 PetscFunctionBegin; 1478 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1479 mat->getrowactive = PETSC_TRUE; 1480 1481 if (!mat->rowvalues && (idx || v)) { 1482 /* 1483 allocate enough space to hold information from the longest row. 1484 */ 1485 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1486 PetscInt max = 1,tmp; 1487 for (i=0; i<matin->rmap->n; i++) { 1488 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1489 if (max < tmp) { max = tmp; } 1490 } 1491 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1492 } 1493 1494 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1495 lrow = row - rstart; 1496 1497 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1498 if (!v) {pvA = 0; pvB = 0;} 1499 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1500 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 nztot = nzA + nzB; 1503 1504 cmap = mat->garray; 1505 if (v || idx) { 1506 if (nztot) { 1507 /* Sort by increasing column numbers, assuming A and B already sorted */ 1508 PetscInt imark = -1; 1509 if (v) { 1510 *v = v_p = mat->rowvalues; 1511 for (i=0; i<nzB; i++) { 1512 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1513 else break; 1514 } 1515 imark = i; 1516 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1517 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1518 } 1519 if (idx) { 1520 *idx = idx_p = mat->rowindices; 1521 if (imark > -1) { 1522 for (i=0; i<imark; i++) { 1523 idx_p[i] = cmap[cworkB[i]]; 1524 } 1525 } else { 1526 for (i=0; i<nzB; i++) { 1527 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1528 else break; 1529 } 1530 imark = i; 1531 } 1532 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1533 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1534 } 1535 } else { 1536 if (idx) *idx = 0; 1537 if (v) *v = 0; 1538 } 1539 } 1540 *nz = nztot; 1541 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1551 1552 PetscFunctionBegin; 1553 if (!aij->getrowactive) { 1554 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1555 } 1556 aij->getrowactive = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatNorm_MPIAIJ" 1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1563 { 1564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1565 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,cstart = mat->cmap->rstart; 1568 PetscReal sum = 0.0; 1569 MatScalar *v; 1570 1571 PetscFunctionBegin; 1572 if (aij->size == 1) { 1573 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1574 } else { 1575 if (type == NORM_FROBENIUS) { 1576 v = amat->a; 1577 for (i=0; i<amat->nz; i++) { 1578 #if defined(PETSC_USE_COMPLEX) 1579 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1580 #else 1581 sum += (*v)*(*v); v++; 1582 #endif 1583 } 1584 v = bmat->a; 1585 for (i=0; i<bmat->nz; i++) { 1586 #if defined(PETSC_USE_COMPLEX) 1587 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1588 #else 1589 sum += (*v)*(*v); v++; 1590 #endif 1591 } 1592 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1593 *norm = sqrt(*norm); 1594 } else if (type == NORM_1) { /* max column norm */ 1595 PetscReal *tmp,*tmp2; 1596 PetscInt *jj,*garray = aij->garray; 1597 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1598 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1599 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1600 *norm = 0.0; 1601 v = amat->a; jj = amat->j; 1602 for (j=0; j<amat->nz; j++) { 1603 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1604 } 1605 v = bmat->a; jj = bmat->j; 1606 for (j=0; j<bmat->nz; j++) { 1607 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1608 } 1609 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1610 for (j=0; j<mat->cmap->N; j++) { 1611 if (tmp2[j] > *norm) *norm = tmp2[j]; 1612 } 1613 ierr = PetscFree(tmp);CHKERRQ(ierr); 1614 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1615 } else if (type == NORM_INFINITY) { /* max row norm */ 1616 PetscReal ntemp = 0.0; 1617 for (j=0; j<aij->A->rmap->n; j++) { 1618 v = amat->a + amat->i[j]; 1619 sum = 0.0; 1620 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1621 sum += PetscAbsScalar(*v); v++; 1622 } 1623 v = bmat->a + bmat->i[j]; 1624 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1625 sum += PetscAbsScalar(*v); v++; 1626 } 1627 if (sum > ntemp) ntemp = sum; 1628 } 1629 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1630 } else { 1631 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1632 } 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatTranspose_MPIAIJ" 1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1640 { 1641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1642 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1643 PetscErrorCode ierr; 1644 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1645 PetscInt cstart=A->cmap->rstart,ncol; 1646 Mat B; 1647 MatScalar *array; 1648 1649 PetscFunctionBegin; 1650 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1651 1652 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1653 ai = Aloc->i; aj = Aloc->j; 1654 bi = Bloc->i; bj = Bloc->j; 1655 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1656 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1657 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1658 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1659 for (i=0; i<ai[ma]; i++){ 1660 d_nnz[aj[i]] ++; 1661 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1662 } 1663 1664 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1665 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1666 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1667 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1668 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1669 } else { 1670 B = *matout; 1671 } 1672 1673 /* copy over the A part */ 1674 array = Aloc->a; 1675 row = A->rmap->rstart; 1676 for (i=0; i<ma; i++) { 1677 ncol = ai[i+1]-ai[i]; 1678 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1679 row++; array += ncol; aj += ncol; 1680 } 1681 aj = Aloc->j; 1682 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1683 1684 /* copy over the B part */ 1685 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1686 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1687 array = Bloc->a; 1688 row = A->rmap->rstart; 1689 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1690 cols_tmp = cols; 1691 for (i=0; i<mb; i++) { 1692 ncol = bi[i+1]-bi[i]; 1693 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1694 row++; array += ncol; cols_tmp += ncol; 1695 } 1696 ierr = PetscFree(cols);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1701 *matout = B; 1702 } else { 1703 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1713 Mat a = aij->A,b = aij->B; 1714 PetscErrorCode ierr; 1715 PetscInt s1,s2,s3; 1716 1717 PetscFunctionBegin; 1718 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1719 if (rr) { 1720 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1721 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1722 /* Overlap communication with computation. */ 1723 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 } 1725 if (ll) { 1726 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1727 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1728 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1729 } 1730 /* scale the diagonal block */ 1731 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1732 1733 if (rr) { 1734 /* Do a scatter end and then right scale the off-diagonal block */ 1735 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1737 } 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1751 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1756 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1757 { 1758 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1759 PetscErrorCode ierr; 1760 1761 PetscFunctionBegin; 1762 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatEqual_MPIAIJ" 1768 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1769 { 1770 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1771 Mat a,b,c,d; 1772 PetscTruth flg; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 a = matA->A; b = matA->B; 1777 c = matB->A; d = matB->B; 1778 1779 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1780 if (flg) { 1781 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1782 } 1783 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatCopy_MPIAIJ" 1789 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1790 { 1791 PetscErrorCode ierr; 1792 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1793 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1794 1795 PetscFunctionBegin; 1796 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1797 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1798 /* because of the column compression in the off-processor part of the matrix a->B, 1799 the number of columns in a->B and b->B may be different, hence we cannot call 1800 the MatCopy() directly on the two parts. If need be, we can provide a more 1801 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1802 then copying the submatrices */ 1803 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1806 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1807 } 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1813 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1814 { 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #include "petscblaslapack.h" 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatAXPY_MPIAIJ" 1825 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1826 { 1827 PetscErrorCode ierr; 1828 PetscInt i; 1829 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1830 PetscBLASInt bnz,one=1; 1831 Mat_SeqAIJ *x,*y; 1832 1833 PetscFunctionBegin; 1834 if (str == SAME_NONZERO_PATTERN) { 1835 PetscScalar alpha = a; 1836 x = (Mat_SeqAIJ *)xx->A->data; 1837 y = (Mat_SeqAIJ *)yy->A->data; 1838 bnz = PetscBLASIntCast(x->nz); 1839 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1840 x = (Mat_SeqAIJ *)xx->B->data; 1841 y = (Mat_SeqAIJ *)yy->B->data; 1842 bnz = PetscBLASIntCast(x->nz); 1843 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1844 } else if (str == SUBSET_NONZERO_PATTERN) { 1845 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1846 1847 x = (Mat_SeqAIJ *)xx->B->data; 1848 y = (Mat_SeqAIJ *)yy->B->data; 1849 if (y->xtoy && y->XtoY != xx->B) { 1850 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1851 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1852 } 1853 if (!y->xtoy) { /* get xtoy */ 1854 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1855 y->XtoY = xx->B; 1856 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1857 } 1858 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1859 } else { 1860 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatConjugate_MPIAIJ" 1869 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1870 { 1871 #if defined(PETSC_USE_COMPLEX) 1872 PetscErrorCode ierr; 1873 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1874 1875 PetscFunctionBegin; 1876 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1877 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1878 #else 1879 PetscFunctionBegin; 1880 #endif 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatRealPart_MPIAIJ" 1886 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1887 { 1888 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1893 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1899 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1900 { 1901 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1906 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #ifdef PETSC_HAVE_PBGL 1911 1912 #include <boost/parallel/mpi/bsp_process_group.hpp> 1913 #include <boost/graph/distributed/ilu_default_graph.hpp> 1914 #include <boost/graph/distributed/ilu_0_block.hpp> 1915 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1916 #include <boost/graph/distributed/petsc/interface.hpp> 1917 #include <boost/multi_array.hpp> 1918 #include <boost/parallel/distributed_property_map->hpp> 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1922 /* 1923 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1924 */ 1925 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1926 { 1927 namespace petsc = boost::distributed::petsc; 1928 1929 namespace graph_dist = boost::graph::distributed; 1930 using boost::graph::distributed::ilu_default::process_group_type; 1931 using boost::graph::ilu_permuted; 1932 1933 PetscTruth row_identity, col_identity; 1934 PetscContainer c; 1935 PetscInt m, n, M, N; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1940 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1941 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1942 if (!row_identity || !col_identity) { 1943 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1944 } 1945 1946 process_group_type pg; 1947 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1948 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1949 lgraph_type& level_graph = *lgraph_p; 1950 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1951 1952 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1953 ilu_permuted(level_graph); 1954 1955 /* put together the new matrix */ 1956 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1957 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1958 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 1959 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 1960 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 1961 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1962 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 1964 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 1965 ierr = PetscContainerSetPointer(c, lgraph_p); 1966 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 1972 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 1973 { 1974 PetscFunctionBegin; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatSolve_MPIAIJ" 1980 /* 1981 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1982 */ 1983 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 1984 { 1985 namespace graph_dist = boost::graph::distributed; 1986 1987 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1988 lgraph_type* lgraph_p; 1989 PetscContainer c; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 1994 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 1995 ierr = VecCopy(b, x);CHKERRQ(ierr); 1996 1997 PetscScalar* array_x; 1998 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 1999 PetscInt sx; 2000 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2001 2002 PetscScalar* array_b; 2003 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2004 PetscInt sb; 2005 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2006 2007 lgraph_type& level_graph = *lgraph_p; 2008 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2009 2010 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2011 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2012 ref_x(array_x, boost::extents[num_vertices(graph)]); 2013 2014 typedef boost::iterator_property_map<array_ref_type::iterator, 2015 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2016 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2017 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2018 2019 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2020 2021 PetscFunctionReturn(0); 2022 } 2023 #endif 2024 2025 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2026 PetscInt nzlocal,nsends,nrecvs; 2027 PetscMPIInt *send_rank,*recv_rank; 2028 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2029 PetscScalar *sbuf_a,**rbuf_a; 2030 PetscErrorCode (*MatDestroy)(Mat); 2031 } Mat_Redundant; 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2035 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2036 { 2037 PetscErrorCode ierr; 2038 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2039 PetscInt i; 2040 2041 PetscFunctionBegin; 2042 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2043 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2044 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2045 for (i=0; i<redund->nrecvs; i++){ 2046 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2047 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2048 } 2049 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2050 ierr = PetscFree(redund);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatDestroy_MatRedundant" 2056 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2057 { 2058 PetscErrorCode ierr; 2059 PetscContainer container; 2060 Mat_Redundant *redund=PETSC_NULL; 2061 2062 PetscFunctionBegin; 2063 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2064 if (container) { 2065 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2066 } else { 2067 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2068 } 2069 A->ops->destroy = redund->MatDestroy; 2070 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2071 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2072 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2078 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2079 { 2080 PetscMPIInt rank,size; 2081 MPI_Comm comm=((PetscObject)mat)->comm; 2082 PetscErrorCode ierr; 2083 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2084 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2085 PetscInt *rowrange=mat->rmap->range; 2086 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2087 Mat A=aij->A,B=aij->B,C=*matredundant; 2088 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2089 PetscScalar *sbuf_a; 2090 PetscInt nzlocal=a->nz+b->nz; 2091 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2092 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2093 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2094 MatScalar *aworkA,*aworkB; 2095 PetscScalar *vals; 2096 PetscMPIInt tag1,tag2,tag3,imdex; 2097 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2098 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2099 MPI_Status recv_status,*send_status; 2100 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2101 PetscInt **rbuf_j=PETSC_NULL; 2102 PetscScalar **rbuf_a=PETSC_NULL; 2103 Mat_Redundant *redund=PETSC_NULL; 2104 PetscContainer container; 2105 2106 PetscFunctionBegin; 2107 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2109 2110 if (reuse == MAT_REUSE_MATRIX) { 2111 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2112 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2113 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2114 if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2115 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2116 if (container) { 2117 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2118 } else { 2119 SETERRQ(PETSC_ERR_PLIB,"Container does not exit"); 2120 } 2121 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2122 2123 nsends = redund->nsends; 2124 nrecvs = redund->nrecvs; 2125 send_rank = redund->send_rank; 2126 recv_rank = redund->recv_rank; 2127 sbuf_nz = redund->sbuf_nz; 2128 rbuf_nz = redund->rbuf_nz; 2129 sbuf_j = redund->sbuf_j; 2130 sbuf_a = redund->sbuf_a; 2131 rbuf_j = redund->rbuf_j; 2132 rbuf_a = redund->rbuf_a; 2133 } 2134 2135 if (reuse == MAT_INITIAL_MATRIX){ 2136 PetscMPIInt subrank,subsize; 2137 PetscInt nleftover,np_subcomm; 2138 /* get the destination processors' id send_rank, nsends and nrecvs */ 2139 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2140 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2141 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2142 np_subcomm = size/nsubcomm; 2143 nleftover = size - nsubcomm*np_subcomm; 2144 nsends = 0; nrecvs = 0; 2145 for (i=0; i<size; i++){ /* i=rank*/ 2146 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2147 send_rank[nsends] = i; nsends++; 2148 recv_rank[nrecvs++] = i; 2149 } 2150 } 2151 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2152 i = size-nleftover-1; 2153 j = 0; 2154 while (j < nsubcomm - nleftover){ 2155 send_rank[nsends++] = i; 2156 i--; j++; 2157 } 2158 } 2159 2160 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2161 for (i=0; i<nleftover; i++){ 2162 recv_rank[nrecvs++] = size-nleftover+i; 2163 } 2164 } 2165 2166 /* allocate sbuf_j, sbuf_a */ 2167 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2168 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2169 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2170 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2171 2172 /* copy mat's local entries into the buffers */ 2173 if (reuse == MAT_INITIAL_MATRIX){ 2174 rownz_max = 0; 2175 rptr = sbuf_j; 2176 cols = sbuf_j + rend-rstart + 1; 2177 vals = sbuf_a; 2178 rptr[0] = 0; 2179 for (i=0; i<rend-rstart; i++){ 2180 row = i + rstart; 2181 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2182 ncols = nzA + nzB; 2183 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2184 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2185 /* load the column indices for this row into cols */ 2186 lwrite = 0; 2187 for (l=0; l<nzB; l++) { 2188 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2189 vals[lwrite] = aworkB[l]; 2190 cols[lwrite++] = ctmp; 2191 } 2192 } 2193 for (l=0; l<nzA; l++){ 2194 vals[lwrite] = aworkA[l]; 2195 cols[lwrite++] = cstart + cworkA[l]; 2196 } 2197 for (l=0; l<nzB; l++) { 2198 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2199 vals[lwrite] = aworkB[l]; 2200 cols[lwrite++] = ctmp; 2201 } 2202 } 2203 vals += ncols; 2204 cols += ncols; 2205 rptr[i+1] = rptr[i] + ncols; 2206 if (rownz_max < ncols) rownz_max = ncols; 2207 } 2208 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2209 } else { /* only copy matrix values into sbuf_a */ 2210 rptr = sbuf_j; 2211 vals = sbuf_a; 2212 rptr[0] = 0; 2213 for (i=0; i<rend-rstart; i++){ 2214 row = i + rstart; 2215 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2216 ncols = nzA + nzB; 2217 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2218 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2219 lwrite = 0; 2220 for (l=0; l<nzB; l++) { 2221 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2222 } 2223 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2224 for (l=0; l<nzB; l++) { 2225 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2226 } 2227 vals += ncols; 2228 rptr[i+1] = rptr[i] + ncols; 2229 } 2230 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2231 2232 /* send nzlocal to others, and recv other's nzlocal */ 2233 /*--------------------------------------------------*/ 2234 if (reuse == MAT_INITIAL_MATRIX){ 2235 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2236 s_waits2 = s_waits3 + nsends; 2237 s_waits1 = s_waits2 + nsends; 2238 r_waits1 = s_waits1 + nsends; 2239 r_waits2 = r_waits1 + nrecvs; 2240 r_waits3 = r_waits2 + nrecvs; 2241 } else { 2242 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2243 r_waits3 = s_waits3 + nsends; 2244 } 2245 2246 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2247 if (reuse == MAT_INITIAL_MATRIX){ 2248 /* get new tags to keep the communication clean */ 2249 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2250 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2251 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2252 2253 /* post receives of other's nzlocal */ 2254 for (i=0; i<nrecvs; i++){ 2255 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2256 } 2257 /* send nzlocal to others */ 2258 for (i=0; i<nsends; i++){ 2259 sbuf_nz[i] = nzlocal; 2260 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2261 } 2262 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2263 count = nrecvs; 2264 while (count) { 2265 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2266 recv_rank[imdex] = recv_status.MPI_SOURCE; 2267 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2268 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2269 2270 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2271 rbuf_nz[imdex] += i + 2; 2272 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2273 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2274 count--; 2275 } 2276 /* wait on sends of nzlocal */ 2277 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2278 /* send mat->i,j to others, and recv from other's */ 2279 /*------------------------------------------------*/ 2280 for (i=0; i<nsends; i++){ 2281 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2282 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2283 } 2284 /* wait on receives of mat->i,j */ 2285 /*------------------------------*/ 2286 count = nrecvs; 2287 while (count) { 2288 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2289 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2290 count--; 2291 } 2292 /* wait on sends of mat->i,j */ 2293 /*---------------------------*/ 2294 if (nsends) { 2295 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2296 } 2297 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2298 2299 /* post receives, send and receive mat->a */ 2300 /*----------------------------------------*/ 2301 for (imdex=0; imdex<nrecvs; imdex++) { 2302 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2303 } 2304 for (i=0; i<nsends; i++){ 2305 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2306 } 2307 count = nrecvs; 2308 while (count) { 2309 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2310 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2311 count--; 2312 } 2313 if (nsends) { 2314 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2315 } 2316 2317 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2318 2319 /* create redundant matrix */ 2320 /*-------------------------*/ 2321 if (reuse == MAT_INITIAL_MATRIX){ 2322 /* compute rownz_max for preallocation */ 2323 for (imdex=0; imdex<nrecvs; imdex++){ 2324 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2325 rptr = rbuf_j[imdex]; 2326 for (i=0; i<j; i++){ 2327 ncols = rptr[i+1] - rptr[i]; 2328 if (rownz_max < ncols) rownz_max = ncols; 2329 } 2330 } 2331 2332 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2333 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2334 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2335 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2336 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2337 } else { 2338 C = *matredundant; 2339 } 2340 2341 /* insert local matrix entries */ 2342 rptr = sbuf_j; 2343 cols = sbuf_j + rend-rstart + 1; 2344 vals = sbuf_a; 2345 for (i=0; i<rend-rstart; i++){ 2346 row = i + rstart; 2347 ncols = rptr[i+1] - rptr[i]; 2348 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2349 vals += ncols; 2350 cols += ncols; 2351 } 2352 /* insert received matrix entries */ 2353 for (imdex=0; imdex<nrecvs; imdex++){ 2354 rstart = rowrange[recv_rank[imdex]]; 2355 rend = rowrange[recv_rank[imdex]+1]; 2356 rptr = rbuf_j[imdex]; 2357 cols = rbuf_j[imdex] + rend-rstart + 1; 2358 vals = rbuf_a[imdex]; 2359 for (i=0; i<rend-rstart; i++){ 2360 row = i + rstart; 2361 ncols = rptr[i+1] - rptr[i]; 2362 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2363 vals += ncols; 2364 cols += ncols; 2365 } 2366 } 2367 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2370 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2371 if (reuse == MAT_INITIAL_MATRIX){ 2372 PetscContainer container; 2373 *matredundant = C; 2374 /* create a supporting struct and attach it to C for reuse */ 2375 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2376 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2377 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2378 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2379 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2380 2381 redund->nzlocal = nzlocal; 2382 redund->nsends = nsends; 2383 redund->nrecvs = nrecvs; 2384 redund->send_rank = send_rank; 2385 redund->recv_rank = recv_rank; 2386 redund->sbuf_nz = sbuf_nz; 2387 redund->rbuf_nz = rbuf_nz; 2388 redund->sbuf_j = sbuf_j; 2389 redund->sbuf_a = sbuf_a; 2390 redund->rbuf_j = rbuf_j; 2391 redund->rbuf_a = rbuf_a; 2392 2393 redund->MatDestroy = C->ops->destroy; 2394 C->ops->destroy = MatDestroy_MatRedundant; 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2401 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2402 { 2403 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2404 PetscErrorCode ierr; 2405 PetscInt i,*idxb = 0; 2406 PetscScalar *va,*vb; 2407 Vec vtmp; 2408 2409 PetscFunctionBegin; 2410 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2411 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2412 if (idx) { 2413 for (i=0; i<A->rmap->n; i++) { 2414 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2415 } 2416 } 2417 2418 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2419 if (idx) { 2420 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2421 } 2422 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2423 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2424 2425 for (i=0; i<A->rmap->n; i++){ 2426 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2427 va[i] = vb[i]; 2428 if (idx) idx[i] = a->garray[idxb[i]]; 2429 } 2430 } 2431 2432 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2433 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2434 if (idxb) { 2435 ierr = PetscFree(idxb);CHKERRQ(ierr); 2436 } 2437 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 #undef __FUNCT__ 2442 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2443 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2444 { 2445 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2446 PetscErrorCode ierr; 2447 PetscInt i,*idxb = 0; 2448 PetscScalar *va,*vb; 2449 Vec vtmp; 2450 2451 PetscFunctionBegin; 2452 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2453 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2454 if (idx) { 2455 for (i=0; i<A->cmap->n; i++) { 2456 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2457 } 2458 } 2459 2460 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2461 if (idx) { 2462 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2463 } 2464 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2465 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2466 2467 for (i=0; i<A->rmap->n; i++){ 2468 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2469 va[i] = vb[i]; 2470 if (idx) idx[i] = a->garray[idxb[i]]; 2471 } 2472 } 2473 2474 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2475 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2476 if (idxb) { 2477 ierr = PetscFree(idxb);CHKERRQ(ierr); 2478 } 2479 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2485 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2486 { 2487 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2488 PetscInt n = A->rmap->n; 2489 PetscInt cstart = A->cmap->rstart; 2490 PetscInt *cmap = mat->garray; 2491 PetscInt *diagIdx, *offdiagIdx; 2492 Vec diagV, offdiagV; 2493 PetscScalar *a, *diagA, *offdiagA; 2494 PetscInt r; 2495 PetscErrorCode ierr; 2496 2497 PetscFunctionBegin; 2498 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2499 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2500 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2501 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2502 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2503 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2504 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2505 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2506 for(r = 0; r < n; ++r) { 2507 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2508 a[r] = diagA[r]; 2509 idx[r] = cstart + diagIdx[r]; 2510 } else { 2511 a[r] = offdiagA[r]; 2512 idx[r] = cmap[offdiagIdx[r]]; 2513 } 2514 } 2515 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2516 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2517 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2518 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2519 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2520 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2526 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2527 { 2528 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2529 PetscInt n = A->rmap->n; 2530 PetscInt cstart = A->cmap->rstart; 2531 PetscInt *cmap = mat->garray; 2532 PetscInt *diagIdx, *offdiagIdx; 2533 Vec diagV, offdiagV; 2534 PetscScalar *a, *diagA, *offdiagA; 2535 PetscInt r; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2540 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2541 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2542 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2543 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2544 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2545 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2546 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2547 for(r = 0; r < n; ++r) { 2548 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2549 a[r] = diagA[r]; 2550 idx[r] = cstart + diagIdx[r]; 2551 } else { 2552 a[r] = offdiagA[r]; 2553 idx[r] = cmap[offdiagIdx[r]]; 2554 } 2555 } 2556 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2557 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2558 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2559 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2560 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2561 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2567 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2568 { 2569 PetscErrorCode ierr; 2570 Mat *dummy; 2571 2572 PetscFunctionBegin; 2573 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2574 *newmat = *dummy; 2575 ierr = PetscFree(dummy);CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2580 /* -------------------------------------------------------------------*/ 2581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2582 MatGetRow_MPIAIJ, 2583 MatRestoreRow_MPIAIJ, 2584 MatMult_MPIAIJ, 2585 /* 4*/ MatMultAdd_MPIAIJ, 2586 MatMultTranspose_MPIAIJ, 2587 MatMultTransposeAdd_MPIAIJ, 2588 #ifdef PETSC_HAVE_PBGL 2589 MatSolve_MPIAIJ, 2590 #else 2591 0, 2592 #endif 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 #ifdef PETSC_HAVE_PBGL 2612 0, 2613 #else 2614 0, 2615 #endif 2616 0, 2617 0, 2618 /*29*/ MatSetUpPreallocation_MPIAIJ, 2619 #ifdef PETSC_HAVE_PBGL 2620 0, 2621 #else 2622 0, 2623 #endif 2624 0, 2625 0, 2626 0, 2627 /*34*/ MatDuplicate_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /*39*/ MatAXPY_MPIAIJ, 2633 MatGetSubMatrices_MPIAIJ, 2634 MatIncreaseOverlap_MPIAIJ, 2635 MatGetValues_MPIAIJ, 2636 MatCopy_MPIAIJ, 2637 /*44*/ MatGetRowMax_MPIAIJ, 2638 MatScale_MPIAIJ, 2639 0, 2640 0, 2641 0, 2642 /*49*/ MatSetBlockSize_MPIAIJ, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /*54*/ MatFDColoringCreate_MPIAIJ, 2648 0, 2649 MatSetUnfactored_MPIAIJ, 2650 MatPermute_MPIAIJ, 2651 0, 2652 /*59*/ MatGetSubMatrix_MPIAIJ, 2653 MatDestroy_MPIAIJ, 2654 MatView_MPIAIJ, 2655 0, 2656 0, 2657 /*64*/ 0, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2663 MatGetRowMinAbs_MPIAIJ, 2664 0, 2665 MatSetColoring_MPIAIJ, 2666 #if defined(PETSC_HAVE_ADIC) 2667 MatSetValuesAdic_MPIAIJ, 2668 #else 2669 0, 2670 #endif 2671 MatSetValuesAdifor_MPIAIJ, 2672 /*75*/ MatFDColoringApply_AIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*80*/ 0, 2678 0, 2679 0, 2680 /*83*/ MatLoad_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 0, 2686 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2687 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2688 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2689 MatPtAP_Basic, 2690 MatPtAPSymbolic_MPIAIJ, 2691 /*94*/ MatPtAPNumeric_MPIAIJ, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /*99*/ 0, 2697 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2698 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2699 MatConjugate_MPIAIJ, 2700 0, 2701 /*104*/MatSetValuesRow_MPIAIJ, 2702 MatRealPart_MPIAIJ, 2703 MatImaginaryPart_MPIAIJ, 2704 0, 2705 0, 2706 /*109*/0, 2707 MatGetRedundantMatrix_MPIAIJ, 2708 MatGetRowMin_MPIAIJ, 2709 0, 2710 0, 2711 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2712 0, 2713 0, 2714 0, 2715 0, 2716 0 2717 }; 2718 2719 /* ----------------------------------------------------------------------------------------*/ 2720 2721 EXTERN_C_BEGIN 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2724 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2725 { 2726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2731 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 EXTERN_C_END 2735 2736 EXTERN_C_BEGIN 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2739 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2740 { 2741 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2746 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 EXTERN_C_END 2750 2751 #include "petscpc.h" 2752 EXTERN_C_BEGIN 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2756 { 2757 Mat_MPIAIJ *b; 2758 PetscErrorCode ierr; 2759 PetscInt i; 2760 2761 PetscFunctionBegin; 2762 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2763 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2764 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2765 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2766 2767 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2768 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2771 if (d_nnz) { 2772 for (i=0; i<B->rmap->n; i++) { 2773 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2774 } 2775 } 2776 if (o_nnz) { 2777 for (i=0; i<B->rmap->n; i++) { 2778 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2779 } 2780 } 2781 b = (Mat_MPIAIJ*)B->data; 2782 2783 if (!B->preallocated) { 2784 /* Explicitly create 2 MATSEQAIJ matrices. */ 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2787 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2791 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2793 } 2794 2795 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2796 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2797 B->preallocated = PETSC_TRUE; 2798 PetscFunctionReturn(0); 2799 } 2800 EXTERN_C_END 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2805 { 2806 Mat mat; 2807 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 *newmat = 0; 2812 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2814 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2815 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2816 a = (Mat_MPIAIJ*)mat->data; 2817 2818 mat->factor = matin->factor; 2819 mat->rmap->bs = matin->rmap->bs; 2820 mat->assembled = PETSC_TRUE; 2821 mat->insertmode = NOT_SET_VALUES; 2822 mat->preallocated = PETSC_TRUE; 2823 2824 a->size = oldmat->size; 2825 a->rank = oldmat->rank; 2826 a->donotstash = oldmat->donotstash; 2827 a->roworiented = oldmat->roworiented; 2828 a->rowindices = 0; 2829 a->rowvalues = 0; 2830 a->getrowactive = PETSC_FALSE; 2831 2832 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2834 2835 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2836 if (oldmat->colmap) { 2837 #if defined (PETSC_USE_CTABLE) 2838 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2839 #else 2840 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2841 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2843 #endif 2844 } else a->colmap = 0; 2845 if (oldmat->garray) { 2846 PetscInt len; 2847 len = oldmat->B->cmap->n; 2848 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2849 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2850 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2851 } else a->garray = 0; 2852 2853 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2855 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2857 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2859 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2861 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2862 *newmat = mat; 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #include "petscsys.h" 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatLoad_MPIAIJ" 2870 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2871 { 2872 Mat A; 2873 PetscScalar *vals,*svals; 2874 MPI_Comm comm = ((PetscObject)viewer)->comm; 2875 MPI_Status status; 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz; 2878 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2879 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2880 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2881 PetscInt cend,cstart,n,*rowners; 2882 int fd; 2883 2884 PetscFunctionBegin; 2885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2887 if (!rank) { 2888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2889 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2890 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2891 } 2892 2893 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2894 M = header[1]; N = header[2]; 2895 /* determine ownership of all rows */ 2896 m = M/size + ((M % size) > rank); 2897 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2898 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2899 2900 /* First process needs enough room for process with most rows */ 2901 if (!rank) { 2902 mmax = rowners[1]; 2903 for (i=2; i<size; i++) { 2904 mmax = PetscMax(mmax,rowners[i]); 2905 } 2906 } else mmax = m; 2907 2908 rowners[0] = 0; 2909 for (i=2; i<=size; i++) { 2910 rowners[i] += rowners[i-1]; 2911 } 2912 rstart = rowners[rank]; 2913 rend = rowners[rank+1]; 2914 2915 /* distribute row lengths to all processors */ 2916 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2917 if (!rank) { 2918 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2919 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2920 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2921 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2922 for (j=0; j<m; j++) { 2923 procsnz[0] += ourlens[j]; 2924 } 2925 for (i=1; i<size; i++) { 2926 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2927 /* calculate the number of nonzeros on each processor */ 2928 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2929 procsnz[i] += rowlengths[j]; 2930 } 2931 mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]); 2932 ierr = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2933 } 2934 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2935 } else { 2936 mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr); 2937 ierr = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2938 } 2939 2940 if (!rank) { 2941 /* determine max buffer needed and allocate it */ 2942 maxnz = 0; 2943 for (i=0; i<size; i++) { 2944 maxnz = PetscMax(maxnz,procsnz[i]); 2945 } 2946 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2947 2948 /* read in my part of the matrix column indices */ 2949 nz = procsnz[0]; 2950 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2951 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2952 2953 /* read in every one elses and ship off */ 2954 for (i=1; i<size; i++) { 2955 nz = procsnz[i]; 2956 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2957 mpicnt = PetscMPIIntCast(nz); 2958 ierr = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2959 } 2960 ierr = PetscFree(cols);CHKERRQ(ierr); 2961 } else { 2962 /* determine buffer space needed for message */ 2963 nz = 0; 2964 for (i=0; i<m; i++) { 2965 nz += ourlens[i]; 2966 } 2967 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2968 2969 /* receive message of column indices*/ 2970 mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr); 2971 ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2972 ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr); 2973 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 2974 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 2975 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 2976 } 2977 2978 /* determine column ownership if matrix is not square */ 2979 if (N != M) { 2980 n = N/size + ((N % size) > rank); 2981 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2982 cstart = cend - n; 2983 } else { 2984 cstart = rstart; 2985 cend = rend; 2986 n = cend - cstart; 2987 } 2988 2989 /* loop over local rows, determining number of off diagonal entries */ 2990 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2991 jj = 0; 2992 for (i=0; i<m; i++) { 2993 for (j=0; j<ourlens[i]; j++) { 2994 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2995 jj++; 2996 } 2997 } 2998 2999 /* create our matrix */ 3000 for (i=0; i<m; i++) { 3001 ourlens[i] -= offlens[i]; 3002 } 3003 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3004 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 3005 ierr = MatSetType(A,type);CHKERRQ(ierr); 3006 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 3007 3008 for (i=0; i<m; i++) { 3009 ourlens[i] += offlens[i]; 3010 } 3011 3012 if (!rank) { 3013 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3014 3015 /* read in my part of the matrix numerical values */ 3016 nz = procsnz[0]; 3017 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3018 3019 /* insert into matrix */ 3020 jj = rstart; 3021 smycols = mycols; 3022 svals = vals; 3023 for (i=0; i<m; i++) { 3024 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3025 smycols += ourlens[i]; 3026 svals += ourlens[i]; 3027 jj++; 3028 } 3029 3030 /* read in other processors and ship out */ 3031 for (i=1; i<size; i++) { 3032 nz = procsnz[i]; 3033 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3034 mpicnt = PetscMPIIntCast(nz); 3035 ierr = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3036 } 3037 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3038 } else { 3039 /* receive numeric values */ 3040 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3041 3042 /* receive message of values*/ 3043 mpicnt = PetscMPIIntCast(nz); 3044 ierr = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3045 ierr = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr); 3046 if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);} 3047 else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);} 3048 else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);} 3049 3050 /* insert into matrix */ 3051 jj = rstart; 3052 smycols = mycols; 3053 svals = vals; 3054 for (i=0; i<m; i++) { 3055 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 smycols += ourlens[i]; 3057 svals += ourlens[i]; 3058 jj++; 3059 } 3060 } 3061 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3062 ierr = PetscFree(vals);CHKERRQ(ierr); 3063 ierr = PetscFree(mycols);CHKERRQ(ierr); 3064 ierr = PetscFree(rowners);CHKERRQ(ierr); 3065 3066 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 *newmat = A; 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3075 { 3076 PetscErrorCode ierr; 3077 IS iscol_local; 3078 PetscInt csize; 3079 3080 PetscFunctionBegin; 3081 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3082 if (call == MAT_REUSE_MATRIX) { 3083 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3084 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3085 } else { 3086 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3087 } 3088 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3089 if (call == MAT_INITIAL_MATRIX) { 3090 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3091 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3092 } 3093 PetscFunctionReturn(0); 3094 } 3095 3096 #undef __FUNCT__ 3097 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3098 /* 3099 Not great since it makes two copies of the submatrix, first an SeqAIJ 3100 in local and then by concatenating the local matrices the end result. 3101 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3102 3103 Note: This requires a sequential iscol with all indices. 3104 */ 3105 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3106 { 3107 PetscErrorCode ierr; 3108 PetscMPIInt rank,size; 3109 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3110 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3111 Mat *local,M,Mreuse; 3112 MatScalar *vwork,*aa; 3113 MPI_Comm comm = ((PetscObject)mat)->comm; 3114 Mat_SeqAIJ *aij; 3115 3116 3117 PetscFunctionBegin; 3118 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3119 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3120 3121 if (call == MAT_REUSE_MATRIX) { 3122 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3123 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3124 local = &Mreuse; 3125 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3126 } else { 3127 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3128 Mreuse = *local; 3129 ierr = PetscFree(local);CHKERRQ(ierr); 3130 } 3131 3132 /* 3133 m - number of local rows 3134 n - number of columns (same on all processors) 3135 rstart - first row in new global matrix generated 3136 */ 3137 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3138 if (call == MAT_INITIAL_MATRIX) { 3139 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3140 ii = aij->i; 3141 jj = aij->j; 3142 3143 /* 3144 Determine the number of non-zeros in the diagonal and off-diagonal 3145 portions of the matrix in order to do correct preallocation 3146 */ 3147 3148 /* first get start and end of "diagonal" columns */ 3149 if (csize == PETSC_DECIDE) { 3150 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3151 if (mglobal == n) { /* square matrix */ 3152 nlocal = m; 3153 } else { 3154 nlocal = n/size + ((n % size) > rank); 3155 } 3156 } else { 3157 nlocal = csize; 3158 } 3159 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3160 rstart = rend - nlocal; 3161 if (rank == size - 1 && rend != n) { 3162 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3163 } 3164 3165 /* next, compute all the lengths */ 3166 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3167 olens = dlens + m; 3168 for (i=0; i<m; i++) { 3169 jend = ii[i+1] - ii[i]; 3170 olen = 0; 3171 dlen = 0; 3172 for (j=0; j<jend; j++) { 3173 if (*jj < rstart || *jj >= rend) olen++; 3174 else dlen++; 3175 jj++; 3176 } 3177 olens[i] = olen; 3178 dlens[i] = dlen; 3179 } 3180 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3181 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3182 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3183 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3184 ierr = PetscFree(dlens);CHKERRQ(ierr); 3185 } else { 3186 PetscInt ml,nl; 3187 3188 M = *newmat; 3189 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3190 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3191 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3192 /* 3193 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3194 rather than the slower MatSetValues(). 3195 */ 3196 M->was_assembled = PETSC_TRUE; 3197 M->assembled = PETSC_FALSE; 3198 } 3199 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3200 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3201 ii = aij->i; 3202 jj = aij->j; 3203 aa = aij->a; 3204 for (i=0; i<m; i++) { 3205 row = rstart + i; 3206 nz = ii[i+1] - ii[i]; 3207 cwork = jj; jj += nz; 3208 vwork = aa; aa += nz; 3209 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3210 } 3211 3212 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3213 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3214 *newmat = M; 3215 3216 /* save submatrix used in processor for next request */ 3217 if (call == MAT_INITIAL_MATRIX) { 3218 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3219 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3220 } 3221 3222 PetscFunctionReturn(0); 3223 } 3224 3225 EXTERN_C_BEGIN 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3228 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3229 { 3230 PetscInt m,cstart, cend,j,nnz,i,d; 3231 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3232 const PetscInt *JJ; 3233 PetscScalar *values; 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3238 3239 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3243 m = B->rmap->n; 3244 cstart = B->cmap->rstart; 3245 cend = B->cmap->rend; 3246 rstart = B->rmap->rstart; 3247 3248 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3249 3250 #if defined(PETSC_USE_DEBUGGING) 3251 for (i=0; i<m; i++) { 3252 nnz = Ii[i+1]- Ii[i]; 3253 JJ = J + Ii[i]; 3254 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3255 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3256 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3257 } 3258 #endif 3259 3260 for (i=0; i<m; i++) { 3261 nnz = Ii[i+1]- Ii[i]; 3262 JJ = J + Ii[i]; 3263 nnz_max = PetscMax(nnz_max,nnz); 3264 d = 0; 3265 for (j=0; j<nnz; j++) { 3266 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3267 } 3268 d_nnz[i] = d; 3269 o_nnz[i] = nnz - d; 3270 } 3271 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3272 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3273 3274 if (v) values = (PetscScalar*)v; 3275 else { 3276 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3277 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3278 } 3279 3280 for (i=0; i<m; i++) { 3281 ii = i + rstart; 3282 nnz = Ii[i+1]- Ii[i]; 3283 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3284 } 3285 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 3288 if (!v) { 3289 ierr = PetscFree(values);CHKERRQ(ierr); 3290 } 3291 PetscFunctionReturn(0); 3292 } 3293 EXTERN_C_END 3294 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3297 /*@ 3298 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3299 (the default parallel PETSc format). 3300 3301 Collective on MPI_Comm 3302 3303 Input Parameters: 3304 + B - the matrix 3305 . i - the indices into j for the start of each local row (starts with zero) 3306 . j - the column indices for each local row (starts with zero) 3307 - v - optional values in the matrix 3308 3309 Level: developer 3310 3311 Notes: 3312 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3313 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3314 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3315 3316 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3317 3318 The format which is used for the sparse matrix input, is equivalent to a 3319 row-major ordering.. i.e for the following matrix, the input data expected is 3320 as shown: 3321 3322 1 0 0 3323 2 0 3 P0 3324 ------- 3325 4 5 6 P1 3326 3327 Process0 [P0]: rows_owned=[0,1] 3328 i = {0,1,3} [size = nrow+1 = 2+1] 3329 j = {0,0,2} [size = nz = 6] 3330 v = {1,2,3} [size = nz = 6] 3331 3332 Process1 [P1]: rows_owned=[2] 3333 i = {0,3} [size = nrow+1 = 1+1] 3334 j = {0,1,2} [size = nz = 6] 3335 v = {4,5,6} [size = nz = 6] 3336 3337 .keywords: matrix, aij, compressed row, sparse, parallel 3338 3339 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3340 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3341 @*/ 3342 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3343 { 3344 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 3345 3346 PetscFunctionBegin; 3347 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3348 if (f) { 3349 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 3350 } 3351 PetscFunctionReturn(0); 3352 } 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3356 /*@C 3357 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3358 (the default parallel PETSc format). For good matrix assembly performance 3359 the user should preallocate the matrix storage by setting the parameters 3360 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3361 performance can be increased by more than a factor of 50. 3362 3363 Collective on MPI_Comm 3364 3365 Input Parameters: 3366 + A - the matrix 3367 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3368 (same value is used for all local rows) 3369 . d_nnz - array containing the number of nonzeros in the various rows of the 3370 DIAGONAL portion of the local submatrix (possibly different for each row) 3371 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3372 The size of this array is equal to the number of local rows, i.e 'm'. 3373 You must leave room for the diagonal entry even if it is zero. 3374 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3375 submatrix (same value is used for all local rows). 3376 - o_nnz - array containing the number of nonzeros in the various rows of the 3377 OFF-DIAGONAL portion of the local submatrix (possibly different for 3378 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3379 structure. The size of this array is equal to the number 3380 of local rows, i.e 'm'. 3381 3382 If the *_nnz parameter is given then the *_nz parameter is ignored 3383 3384 The AIJ format (also called the Yale sparse matrix format or 3385 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3386 storage. The stored row and column indices begin with zero. See the users manual for details. 3387 3388 The parallel matrix is partitioned such that the first m0 rows belong to 3389 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3390 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3391 3392 The DIAGONAL portion of the local submatrix of a processor can be defined 3393 as the submatrix which is obtained by extraction the part corresponding 3394 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 3395 first row that belongs to the processor, and r2 is the last row belonging 3396 to the this processor. This is a square mxm matrix. The remaining portion 3397 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 3398 3399 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3400 3401 You can call MatGetInfo() to get information on how effective the preallocation was; 3402 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3403 You can also run with the option -info and look for messages with the string 3404 malloc in them to see if additional memory allocation was needed. 3405 3406 Example usage: 3407 3408 Consider the following 8x8 matrix with 34 non-zero values, that is 3409 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3410 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3411 as follows: 3412 3413 .vb 3414 1 2 0 | 0 3 0 | 0 4 3415 Proc0 0 5 6 | 7 0 0 | 8 0 3416 9 0 10 | 11 0 0 | 12 0 3417 ------------------------------------- 3418 13 0 14 | 15 16 17 | 0 0 3419 Proc1 0 18 0 | 19 20 21 | 0 0 3420 0 0 0 | 22 23 0 | 24 0 3421 ------------------------------------- 3422 Proc2 25 26 27 | 0 0 28 | 29 0 3423 30 0 0 | 31 32 33 | 0 34 3424 .ve 3425 3426 This can be represented as a collection of submatrices as: 3427 3428 .vb 3429 A B C 3430 D E F 3431 G H I 3432 .ve 3433 3434 Where the submatrices A,B,C are owned by proc0, D,E,F are 3435 owned by proc1, G,H,I are owned by proc2. 3436 3437 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3438 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3439 The 'M','N' parameters are 8,8, and have the same values on all procs. 3440 3441 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3442 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3443 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3444 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3445 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3446 matrix, ans [DF] as another SeqAIJ matrix. 3447 3448 When d_nz, o_nz parameters are specified, d_nz storage elements are 3449 allocated for every row of the local diagonal submatrix, and o_nz 3450 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3451 One way to choose d_nz and o_nz is to use the max nonzerors per local 3452 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3453 In this case, the values of d_nz,o_nz are: 3454 .vb 3455 proc0 : dnz = 2, o_nz = 2 3456 proc1 : dnz = 3, o_nz = 2 3457 proc2 : dnz = 1, o_nz = 4 3458 .ve 3459 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3460 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3461 for proc3. i.e we are using 12+15+10=37 storage locations to store 3462 34 values. 3463 3464 When d_nnz, o_nnz parameters are specified, the storage is specified 3465 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3466 In the above case the values for d_nnz,o_nnz are: 3467 .vb 3468 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3469 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3470 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3471 .ve 3472 Here the space allocated is sum of all the above values i.e 34, and 3473 hence pre-allocation is perfect. 3474 3475 Level: intermediate 3476 3477 .keywords: matrix, aij, compressed row, sparse, parallel 3478 3479 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3480 MPIAIJ, MatGetInfo() 3481 @*/ 3482 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3483 { 3484 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3485 3486 PetscFunctionBegin; 3487 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3488 if (f) { 3489 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3490 } 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3496 /*@ 3497 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3498 CSR format the local rows. 3499 3500 Collective on MPI_Comm 3501 3502 Input Parameters: 3503 + comm - MPI communicator 3504 . m - number of local rows (Cannot be PETSC_DECIDE) 3505 . n - This value should be the same as the local size used in creating the 3506 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3507 calculated if N is given) For square matrices n is almost always m. 3508 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3509 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3510 . i - row indices 3511 . j - column indices 3512 - a - matrix values 3513 3514 Output Parameter: 3515 . mat - the matrix 3516 3517 Level: intermediate 3518 3519 Notes: 3520 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3521 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3522 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3523 3524 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3525 3526 The format which is used for the sparse matrix input, is equivalent to a 3527 row-major ordering.. i.e for the following matrix, the input data expected is 3528 as shown: 3529 3530 1 0 0 3531 2 0 3 P0 3532 ------- 3533 4 5 6 P1 3534 3535 Process0 [P0]: rows_owned=[0,1] 3536 i = {0,1,3} [size = nrow+1 = 2+1] 3537 j = {0,0,2} [size = nz = 6] 3538 v = {1,2,3} [size = nz = 6] 3539 3540 Process1 [P1]: rows_owned=[2] 3541 i = {0,3} [size = nrow+1 = 1+1] 3542 j = {0,1,2} [size = nz = 6] 3543 v = {4,5,6} [size = nz = 6] 3544 3545 .keywords: matrix, aij, compressed row, sparse, parallel 3546 3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3548 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3549 @*/ 3550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3551 { 3552 PetscErrorCode ierr; 3553 3554 PetscFunctionBegin; 3555 if (i[0]) { 3556 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3557 } 3558 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3559 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3560 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3561 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3562 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3563 PetscFunctionReturn(0); 3564 } 3565 3566 #undef __FUNCT__ 3567 #define __FUNCT__ "MatCreateMPIAIJ" 3568 /*@C 3569 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3570 (the default parallel PETSc format). For good matrix assembly performance 3571 the user should preallocate the matrix storage by setting the parameters 3572 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3573 performance can be increased by more than a factor of 50. 3574 3575 Collective on MPI_Comm 3576 3577 Input Parameters: 3578 + comm - MPI communicator 3579 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3580 This value should be the same as the local size used in creating the 3581 y vector for the matrix-vector product y = Ax. 3582 . n - This value should be the same as the local size used in creating the 3583 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3584 calculated if N is given) For square matrices n is almost always m. 3585 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3586 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3587 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3588 (same value is used for all local rows) 3589 . d_nnz - array containing the number of nonzeros in the various rows of the 3590 DIAGONAL portion of the local submatrix (possibly different for each row) 3591 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3592 The size of this array is equal to the number of local rows, i.e 'm'. 3593 You must leave room for the diagonal entry even if it is zero. 3594 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3595 submatrix (same value is used for all local rows). 3596 - o_nnz - array containing the number of nonzeros in the various rows of the 3597 OFF-DIAGONAL portion of the local submatrix (possibly different for 3598 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3599 structure. The size of this array is equal to the number 3600 of local rows, i.e 'm'. 3601 3602 Output Parameter: 3603 . A - the matrix 3604 3605 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3606 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3607 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3608 3609 Notes: 3610 If the *_nnz parameter is given then the *_nz parameter is ignored 3611 3612 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3613 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3614 storage requirements for this matrix. 3615 3616 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3617 processor than it must be used on all processors that share the object for 3618 that argument. 3619 3620 The user MUST specify either the local or global matrix dimensions 3621 (possibly both). 3622 3623 The parallel matrix is partitioned across processors such that the 3624 first m0 rows belong to process 0, the next m1 rows belong to 3625 process 1, the next m2 rows belong to process 2 etc.. where 3626 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3627 values corresponding to [m x N] submatrix. 3628 3629 The columns are logically partitioned with the n0 columns belonging 3630 to 0th partition, the next n1 columns belonging to the next 3631 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3632 3633 The DIAGONAL portion of the local submatrix on any given processor 3634 is the submatrix corresponding to the rows and columns m,n 3635 corresponding to the given processor. i.e diagonal matrix on 3636 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3637 etc. The remaining portion of the local submatrix [m x (N-n)] 3638 constitute the OFF-DIAGONAL portion. The example below better 3639 illustrates this concept. 3640 3641 For a square global matrix we define each processor's diagonal portion 3642 to be its local rows and the corresponding columns (a square submatrix); 3643 each processor's off-diagonal portion encompasses the remainder of the 3644 local matrix (a rectangular submatrix). 3645 3646 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3647 3648 When calling this routine with a single process communicator, a matrix of 3649 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3650 type of communicator, use the construction mechanism: 3651 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3652 3653 By default, this format uses inodes (identical nodes) when possible. 3654 We search for consecutive rows with the same nonzero structure, thereby 3655 reusing matrix information to achieve increased efficiency. 3656 3657 Options Database Keys: 3658 + -mat_no_inode - Do not use inodes 3659 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3660 - -mat_aij_oneindex - Internally use indexing starting at 1 3661 rather than 0. Note that when calling MatSetValues(), 3662 the user still MUST index entries starting at 0! 3663 3664 3665 Example usage: 3666 3667 Consider the following 8x8 matrix with 34 non-zero values, that is 3668 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3669 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3670 as follows: 3671 3672 .vb 3673 1 2 0 | 0 3 0 | 0 4 3674 Proc0 0 5 6 | 7 0 0 | 8 0 3675 9 0 10 | 11 0 0 | 12 0 3676 ------------------------------------- 3677 13 0 14 | 15 16 17 | 0 0 3678 Proc1 0 18 0 | 19 20 21 | 0 0 3679 0 0 0 | 22 23 0 | 24 0 3680 ------------------------------------- 3681 Proc2 25 26 27 | 0 0 28 | 29 0 3682 30 0 0 | 31 32 33 | 0 34 3683 .ve 3684 3685 This can be represented as a collection of submatrices as: 3686 3687 .vb 3688 A B C 3689 D E F 3690 G H I 3691 .ve 3692 3693 Where the submatrices A,B,C are owned by proc0, D,E,F are 3694 owned by proc1, G,H,I are owned by proc2. 3695 3696 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3697 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3698 The 'M','N' parameters are 8,8, and have the same values on all procs. 3699 3700 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3701 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3702 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3703 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3704 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3705 matrix, ans [DF] as another SeqAIJ matrix. 3706 3707 When d_nz, o_nz parameters are specified, d_nz storage elements are 3708 allocated for every row of the local diagonal submatrix, and o_nz 3709 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3710 One way to choose d_nz and o_nz is to use the max nonzerors per local 3711 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3712 In this case, the values of d_nz,o_nz are: 3713 .vb 3714 proc0 : dnz = 2, o_nz = 2 3715 proc1 : dnz = 3, o_nz = 2 3716 proc2 : dnz = 1, o_nz = 4 3717 .ve 3718 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3719 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3720 for proc3. i.e we are using 12+15+10=37 storage locations to store 3721 34 values. 3722 3723 When d_nnz, o_nnz parameters are specified, the storage is specified 3724 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3725 In the above case the values for d_nnz,o_nnz are: 3726 .vb 3727 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3728 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3729 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3730 .ve 3731 Here the space allocated is sum of all the above values i.e 34, and 3732 hence pre-allocation is perfect. 3733 3734 Level: intermediate 3735 3736 .keywords: matrix, aij, compressed row, sparse, parallel 3737 3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3739 MPIAIJ, MatCreateMPIAIJWithArrays() 3740 @*/ 3741 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3742 { 3743 PetscErrorCode ierr; 3744 PetscMPIInt size; 3745 3746 PetscFunctionBegin; 3747 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3748 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3749 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3750 if (size > 1) { 3751 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3752 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3753 } else { 3754 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3755 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3756 } 3757 PetscFunctionReturn(0); 3758 } 3759 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3762 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3763 { 3764 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3765 3766 PetscFunctionBegin; 3767 *Ad = a->A; 3768 *Ao = a->B; 3769 *colmap = a->garray; 3770 PetscFunctionReturn(0); 3771 } 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3775 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3776 { 3777 PetscErrorCode ierr; 3778 PetscInt i; 3779 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3780 3781 PetscFunctionBegin; 3782 if (coloring->ctype == IS_COLORING_GLOBAL) { 3783 ISColoringValue *allcolors,*colors; 3784 ISColoring ocoloring; 3785 3786 /* set coloring for diagonal portion */ 3787 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3788 3789 /* set coloring for off-diagonal portion */ 3790 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3791 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3792 for (i=0; i<a->B->cmap->n; i++) { 3793 colors[i] = allcolors[a->garray[i]]; 3794 } 3795 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3796 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3797 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3798 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3799 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3800 ISColoringValue *colors; 3801 PetscInt *larray; 3802 ISColoring ocoloring; 3803 3804 /* set coloring for diagonal portion */ 3805 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3806 for (i=0; i<a->A->cmap->n; i++) { 3807 larray[i] = i + A->cmap->rstart; 3808 } 3809 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3810 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3811 for (i=0; i<a->A->cmap->n; i++) { 3812 colors[i] = coloring->colors[larray[i]]; 3813 } 3814 ierr = PetscFree(larray);CHKERRQ(ierr); 3815 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3816 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3817 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3818 3819 /* set coloring for off-diagonal portion */ 3820 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3821 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3822 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3823 for (i=0; i<a->B->cmap->n; i++) { 3824 colors[i] = coloring->colors[larray[i]]; 3825 } 3826 ierr = PetscFree(larray);CHKERRQ(ierr); 3827 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3828 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3829 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3830 } else { 3831 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3832 } 3833 3834 PetscFunctionReturn(0); 3835 } 3836 3837 #if defined(PETSC_HAVE_ADIC) 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3840 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3841 { 3842 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3843 PetscErrorCode ierr; 3844 3845 PetscFunctionBegin; 3846 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3847 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3848 PetscFunctionReturn(0); 3849 } 3850 #endif 3851 3852 #undef __FUNCT__ 3853 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3854 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3855 { 3856 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3857 PetscErrorCode ierr; 3858 3859 PetscFunctionBegin; 3860 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3861 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3862 PetscFunctionReturn(0); 3863 } 3864 3865 #undef __FUNCT__ 3866 #define __FUNCT__ "MatMerge" 3867 /*@ 3868 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3869 matrices from each processor 3870 3871 Collective on MPI_Comm 3872 3873 Input Parameters: 3874 + comm - the communicators the parallel matrix will live on 3875 . inmat - the input sequential matrices 3876 . n - number of local columns (or PETSC_DECIDE) 3877 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3878 3879 Output Parameter: 3880 . outmat - the parallel matrix generated 3881 3882 Level: advanced 3883 3884 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3885 3886 @*/ 3887 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3888 { 3889 PetscErrorCode ierr; 3890 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3891 PetscInt *indx; 3892 PetscScalar *values; 3893 3894 PetscFunctionBegin; 3895 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3896 if (scall == MAT_INITIAL_MATRIX){ 3897 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3898 if (n == PETSC_DECIDE){ 3899 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3900 } 3901 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3902 rstart -= m; 3903 3904 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3905 for (i=0;i<m;i++) { 3906 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3907 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3908 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3909 } 3910 /* This routine will ONLY return MPIAIJ type matrix */ 3911 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3912 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3913 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3914 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3915 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3916 3917 } else if (scall == MAT_REUSE_MATRIX){ 3918 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3919 } else { 3920 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3921 } 3922 3923 for (i=0;i<m;i++) { 3924 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3925 Ii = i + rstart; 3926 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3927 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3928 } 3929 ierr = MatDestroy(inmat);CHKERRQ(ierr); 3930 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 3933 PetscFunctionReturn(0); 3934 } 3935 3936 #undef __FUNCT__ 3937 #define __FUNCT__ "MatFileSplit" 3938 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3939 { 3940 PetscErrorCode ierr; 3941 PetscMPIInt rank; 3942 PetscInt m,N,i,rstart,nnz; 3943 size_t len; 3944 const PetscInt *indx; 3945 PetscViewer out; 3946 char *name; 3947 Mat B; 3948 const PetscScalar *values; 3949 3950 PetscFunctionBegin; 3951 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3952 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3953 /* Should this be the type of the diagonal block of A? */ 3954 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3955 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3956 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3957 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3958 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3959 for (i=0;i<m;i++) { 3960 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3961 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3962 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3963 } 3964 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3965 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3966 3967 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 3968 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3969 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 3970 sprintf(name,"%s.%d",outfile,rank); 3971 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3972 ierr = PetscFree(name); 3973 ierr = MatView(B,out);CHKERRQ(ierr); 3974 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 3975 ierr = MatDestroy(B);CHKERRQ(ierr); 3976 PetscFunctionReturn(0); 3977 } 3978 3979 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 3980 #undef __FUNCT__ 3981 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3982 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3983 { 3984 PetscErrorCode ierr; 3985 Mat_Merge_SeqsToMPI *merge; 3986 PetscContainer container; 3987 3988 PetscFunctionBegin; 3989 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3990 if (container) { 3991 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3998 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3999 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4000 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4001 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4002 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4003 4004 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4005 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4006 } 4007 ierr = PetscFree(merge);CHKERRQ(ierr); 4008 4009 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4010 PetscFunctionReturn(0); 4011 } 4012 4013 #include "../src/mat/utils/freespace.h" 4014 #include "petscbt.h" 4015 4016 #undef __FUNCT__ 4017 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4018 /*@C 4019 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4020 matrices from each processor 4021 4022 Collective on MPI_Comm 4023 4024 Input Parameters: 4025 + comm - the communicators the parallel matrix will live on 4026 . seqmat - the input sequential matrices 4027 . m - number of local rows (or PETSC_DECIDE) 4028 . n - number of local columns (or PETSC_DECIDE) 4029 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4030 4031 Output Parameter: 4032 . mpimat - the parallel matrix generated 4033 4034 Level: advanced 4035 4036 Notes: 4037 The dimensions of the sequential matrix in each processor MUST be the same. 4038 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4039 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4040 @*/ 4041 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4042 { 4043 PetscErrorCode ierr; 4044 MPI_Comm comm=((PetscObject)mpimat)->comm; 4045 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4046 PetscMPIInt size,rank,taga,*len_s; 4047 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4048 PetscInt proc,m; 4049 PetscInt **buf_ri,**buf_rj; 4050 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4051 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4052 MPI_Request *s_waits,*r_waits; 4053 MPI_Status *status; 4054 MatScalar *aa=a->a; 4055 MatScalar **abuf_r,*ba_i; 4056 Mat_Merge_SeqsToMPI *merge; 4057 PetscContainer container; 4058 4059 PetscFunctionBegin; 4060 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4061 4062 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4063 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4064 4065 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4066 if (container) { 4067 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4068 } 4069 bi = merge->bi; 4070 bj = merge->bj; 4071 buf_ri = merge->buf_ri; 4072 buf_rj = merge->buf_rj; 4073 4074 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4075 owners = merge->rowmap->range; 4076 len_s = merge->len_s; 4077 4078 /* send and recv matrix values */ 4079 /*-----------------------------*/ 4080 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4081 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4082 4083 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4084 for (proc=0,k=0; proc<size; proc++){ 4085 if (!len_s[proc]) continue; 4086 i = owners[proc]; 4087 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4088 k++; 4089 } 4090 4091 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4092 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4093 ierr = PetscFree(status);CHKERRQ(ierr); 4094 4095 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4096 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4097 4098 /* insert mat values of mpimat */ 4099 /*----------------------------*/ 4100 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4101 ierr = PetscMalloc3(merge->nrecv,PetscInt**,&buf_ri_k,merge->nrecv,PetscInt**,&nextrow,merge->nrecv,PetscInt**,&nextai);CHKERRQ(ierr); 4102 4103 for (k=0; k<merge->nrecv; k++){ 4104 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4105 nrows = *(buf_ri_k[k]); 4106 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4107 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4108 } 4109 4110 /* set values of ba */ 4111 m = merge->rowmap->n; 4112 for (i=0; i<m; i++) { 4113 arow = owners[rank] + i; 4114 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4115 bnzi = bi[i+1] - bi[i]; 4116 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4117 4118 /* add local non-zero vals of this proc's seqmat into ba */ 4119 anzi = ai[arow+1] - ai[arow]; 4120 aj = a->j + ai[arow]; 4121 aa = a->a + ai[arow]; 4122 nextaj = 0; 4123 for (j=0; nextaj<anzi; j++){ 4124 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4125 ba_i[j] += aa[nextaj++]; 4126 } 4127 } 4128 4129 /* add received vals into ba */ 4130 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4131 /* i-th row */ 4132 if (i == *nextrow[k]) { 4133 anzi = *(nextai[k]+1) - *nextai[k]; 4134 aj = buf_rj[k] + *(nextai[k]); 4135 aa = abuf_r[k] + *(nextai[k]); 4136 nextaj = 0; 4137 for (j=0; nextaj<anzi; j++){ 4138 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4139 ba_i[j] += aa[nextaj++]; 4140 } 4141 } 4142 nextrow[k]++; nextai[k]++; 4143 } 4144 } 4145 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4146 } 4147 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 4150 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4151 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4152 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4153 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4154 PetscFunctionReturn(0); 4155 } 4156 4157 #undef __FUNCT__ 4158 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4159 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4160 { 4161 PetscErrorCode ierr; 4162 Mat B_mpi; 4163 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4164 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4165 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4166 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4167 PetscInt len,proc,*dnz,*onz; 4168 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4169 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4170 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4171 MPI_Status *status; 4172 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4173 PetscBT lnkbt; 4174 Mat_Merge_SeqsToMPI *merge; 4175 PetscContainer container; 4176 4177 PetscFunctionBegin; 4178 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4179 4180 /* make sure it is a PETSc comm */ 4181 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4182 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4183 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4184 4185 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4186 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4187 4188 /* determine row ownership */ 4189 /*---------------------------------------------------------*/ 4190 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4191 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4192 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4193 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4194 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4195 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4196 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4197 4198 m = merge->rowmap->n; 4199 M = merge->rowmap->N; 4200 owners = merge->rowmap->range; 4201 4202 /* determine the number of messages to send, their lengths */ 4203 /*---------------------------------------------------------*/ 4204 len_s = merge->len_s; 4205 4206 len = 0; /* length of buf_si[] */ 4207 merge->nsend = 0; 4208 for (proc=0; proc<size; proc++){ 4209 len_si[proc] = 0; 4210 if (proc == rank){ 4211 len_s[proc] = 0; 4212 } else { 4213 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4214 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4215 } 4216 if (len_s[proc]) { 4217 merge->nsend++; 4218 nrows = 0; 4219 for (i=owners[proc]; i<owners[proc+1]; i++){ 4220 if (ai[i+1] > ai[i]) nrows++; 4221 } 4222 len_si[proc] = 2*(nrows+1); 4223 len += len_si[proc]; 4224 } 4225 } 4226 4227 /* determine the number and length of messages to receive for ij-structure */ 4228 /*-------------------------------------------------------------------------*/ 4229 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4230 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4231 4232 /* post the Irecv of j-structure */ 4233 /*-------------------------------*/ 4234 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4235 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4236 4237 /* post the Isend of j-structure */ 4238 /*--------------------------------*/ 4239 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4240 4241 for (proc=0, k=0; proc<size; proc++){ 4242 if (!len_s[proc]) continue; 4243 i = owners[proc]; 4244 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4245 k++; 4246 } 4247 4248 /* receives and sends of j-structure are complete */ 4249 /*------------------------------------------------*/ 4250 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4251 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4252 4253 /* send and recv i-structure */ 4254 /*---------------------------*/ 4255 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4256 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4257 4258 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4259 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4260 for (proc=0,k=0; proc<size; proc++){ 4261 if (!len_s[proc]) continue; 4262 /* form outgoing message for i-structure: 4263 buf_si[0]: nrows to be sent 4264 [1:nrows]: row index (global) 4265 [nrows+1:2*nrows+1]: i-structure index 4266 */ 4267 /*-------------------------------------------*/ 4268 nrows = len_si[proc]/2 - 1; 4269 buf_si_i = buf_si + nrows+1; 4270 buf_si[0] = nrows; 4271 buf_si_i[0] = 0; 4272 nrows = 0; 4273 for (i=owners[proc]; i<owners[proc+1]; i++){ 4274 anzi = ai[i+1] - ai[i]; 4275 if (anzi) { 4276 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4277 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4278 nrows++; 4279 } 4280 } 4281 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4282 k++; 4283 buf_si += len_si[proc]; 4284 } 4285 4286 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4287 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4288 4289 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4290 for (i=0; i<merge->nrecv; i++){ 4291 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4292 } 4293 4294 ierr = PetscFree(len_si);CHKERRQ(ierr); 4295 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4296 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4297 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4298 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4299 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4300 ierr = PetscFree(status);CHKERRQ(ierr); 4301 4302 /* compute a local seq matrix in each processor */ 4303 /*----------------------------------------------*/ 4304 /* allocate bi array and free space for accumulating nonzero column info */ 4305 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4306 bi[0] = 0; 4307 4308 /* create and initialize a linked list */ 4309 nlnk = N+1; 4310 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4311 4312 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4313 len = 0; 4314 len = ai[owners[rank+1]] - ai[owners[rank]]; 4315 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4316 current_space = free_space; 4317 4318 /* determine symbolic info for each local row */ 4319 ierr = PetscMalloc3(merge->nrecv,PetscInt**,&buf_ri_k,merge->nrecv,PetscInt**,&nextrow,merge->nrecv,PetscInt**,&nextai);CHKERRQ(ierr); 4320 4321 for (k=0; k<merge->nrecv; k++){ 4322 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4323 nrows = *buf_ri_k[k]; 4324 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4325 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4326 } 4327 4328 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4329 len = 0; 4330 for (i=0;i<m;i++) { 4331 bnzi = 0; 4332 /* add local non-zero cols of this proc's seqmat into lnk */ 4333 arow = owners[rank] + i; 4334 anzi = ai[arow+1] - ai[arow]; 4335 aj = a->j + ai[arow]; 4336 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4337 bnzi += nlnk; 4338 /* add received col data into lnk */ 4339 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4340 if (i == *nextrow[k]) { /* i-th row */ 4341 anzi = *(nextai[k]+1) - *nextai[k]; 4342 aj = buf_rj[k] + *nextai[k]; 4343 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4344 bnzi += nlnk; 4345 nextrow[k]++; nextai[k]++; 4346 } 4347 } 4348 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4349 4350 /* if free space is not available, make more free space */ 4351 if (current_space->local_remaining<bnzi) { 4352 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4353 nspacedouble++; 4354 } 4355 /* copy data into free space, then initialize lnk */ 4356 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4357 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4358 4359 current_space->array += bnzi; 4360 current_space->local_used += bnzi; 4361 current_space->local_remaining -= bnzi; 4362 4363 bi[i+1] = bi[i] + bnzi; 4364 } 4365 4366 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4367 4368 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4369 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4371 4372 /* create symbolic parallel matrix B_mpi */ 4373 /*---------------------------------------*/ 4374 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4375 if (n==PETSC_DECIDE) { 4376 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4377 } else { 4378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4379 } 4380 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4381 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4382 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4383 4384 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4385 B_mpi->assembled = PETSC_FALSE; 4386 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4387 merge->bi = bi; 4388 merge->bj = bj; 4389 merge->buf_ri = buf_ri; 4390 merge->buf_rj = buf_rj; 4391 merge->coi = PETSC_NULL; 4392 merge->coj = PETSC_NULL; 4393 merge->owners_co = PETSC_NULL; 4394 4395 /* attach the supporting struct to B_mpi for reuse */ 4396 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4397 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4398 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4399 *mpimat = B_mpi; 4400 4401 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4402 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "MatMerge_SeqsToMPI" 4408 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4409 { 4410 PetscErrorCode ierr; 4411 4412 PetscFunctionBegin; 4413 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4414 if (scall == MAT_INITIAL_MATRIX){ 4415 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4416 } 4417 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4418 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "MatGetLocalMat" 4424 /*@ 4425 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4426 4427 Not Collective 4428 4429 Input Parameters: 4430 + A - the matrix 4431 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4432 4433 Output Parameter: 4434 . A_loc - the local sequential matrix generated 4435 4436 Level: developer 4437 4438 @*/ 4439 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4440 { 4441 PetscErrorCode ierr; 4442 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4443 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4444 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4445 MatScalar *aa=a->a,*ba=b->a,*cam; 4446 PetscScalar *ca; 4447 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4448 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4449 4450 PetscFunctionBegin; 4451 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4452 if (scall == MAT_INITIAL_MATRIX){ 4453 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4454 ci[0] = 0; 4455 for (i=0; i<am; i++){ 4456 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4457 } 4458 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4459 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4460 k = 0; 4461 for (i=0; i<am; i++) { 4462 ncols_o = bi[i+1] - bi[i]; 4463 ncols_d = ai[i+1] - ai[i]; 4464 /* off-diagonal portion of A */ 4465 for (jo=0; jo<ncols_o; jo++) { 4466 col = cmap[*bj]; 4467 if (col >= cstart) break; 4468 cj[k] = col; bj++; 4469 ca[k++] = *ba++; 4470 } 4471 /* diagonal portion of A */ 4472 for (j=0; j<ncols_d; j++) { 4473 cj[k] = cstart + *aj++; 4474 ca[k++] = *aa++; 4475 } 4476 /* off-diagonal portion of A */ 4477 for (j=jo; j<ncols_o; j++) { 4478 cj[k] = cmap[*bj++]; 4479 ca[k++] = *ba++; 4480 } 4481 } 4482 /* put together the new matrix */ 4483 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4484 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4485 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4486 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4487 mat->free_a = PETSC_TRUE; 4488 mat->free_ij = PETSC_TRUE; 4489 mat->nonew = 0; 4490 } else if (scall == MAT_REUSE_MATRIX){ 4491 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4492 ci = mat->i; cj = mat->j; cam = mat->a; 4493 for (i=0; i<am; i++) { 4494 /* off-diagonal portion of A */ 4495 ncols_o = bi[i+1] - bi[i]; 4496 for (jo=0; jo<ncols_o; jo++) { 4497 col = cmap[*bj]; 4498 if (col >= cstart) break; 4499 *cam++ = *ba++; bj++; 4500 } 4501 /* diagonal portion of A */ 4502 ncols_d = ai[i+1] - ai[i]; 4503 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4504 /* off-diagonal portion of A */ 4505 for (j=jo; j<ncols_o; j++) { 4506 *cam++ = *ba++; bj++; 4507 } 4508 } 4509 } else { 4510 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4511 } 4512 4513 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4514 PetscFunctionReturn(0); 4515 } 4516 4517 #undef __FUNCT__ 4518 #define __FUNCT__ "MatGetLocalMatCondensed" 4519 /*@C 4520 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4521 4522 Not Collective 4523 4524 Input Parameters: 4525 + A - the matrix 4526 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4527 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4528 4529 Output Parameter: 4530 . A_loc - the local sequential matrix generated 4531 4532 Level: developer 4533 4534 @*/ 4535 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4536 { 4537 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4538 PetscErrorCode ierr; 4539 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4540 IS isrowa,iscola; 4541 Mat *aloc; 4542 4543 PetscFunctionBegin; 4544 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4545 if (!row){ 4546 start = A->rmap->rstart; end = A->rmap->rend; 4547 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4548 } else { 4549 isrowa = *row; 4550 } 4551 if (!col){ 4552 start = A->cmap->rstart; 4553 cmap = a->garray; 4554 nzA = a->A->cmap->n; 4555 nzB = a->B->cmap->n; 4556 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4557 ncols = 0; 4558 for (i=0; i<nzB; i++) { 4559 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4560 else break; 4561 } 4562 imark = i; 4563 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4564 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4565 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 4566 ierr = PetscFree(idx);CHKERRQ(ierr); 4567 } else { 4568 iscola = *col; 4569 } 4570 if (scall != MAT_INITIAL_MATRIX){ 4571 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4572 aloc[0] = *A_loc; 4573 } 4574 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4575 *A_loc = aloc[0]; 4576 ierr = PetscFree(aloc);CHKERRQ(ierr); 4577 if (!row){ 4578 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4579 } 4580 if (!col){ 4581 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4582 } 4583 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #undef __FUNCT__ 4588 #define __FUNCT__ "MatGetBrowsOfAcols" 4589 /*@C 4590 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4591 4592 Collective on Mat 4593 4594 Input Parameters: 4595 + A,B - the matrices in mpiaij format 4596 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4597 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4598 4599 Output Parameter: 4600 + rowb, colb - index sets of rows and columns of B to extract 4601 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4602 - B_seq - the sequential matrix generated 4603 4604 Level: developer 4605 4606 @*/ 4607 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4608 { 4609 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4610 PetscErrorCode ierr; 4611 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4612 IS isrowb,iscolb; 4613 Mat *bseq; 4614 4615 PetscFunctionBegin; 4616 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4617 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4618 } 4619 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4620 4621 if (scall == MAT_INITIAL_MATRIX){ 4622 start = A->cmap->rstart; 4623 cmap = a->garray; 4624 nzA = a->A->cmap->n; 4625 nzB = a->B->cmap->n; 4626 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4627 ncols = 0; 4628 for (i=0; i<nzB; i++) { /* row < local row index */ 4629 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4630 else break; 4631 } 4632 imark = i; 4633 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4634 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4635 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 4636 ierr = PetscFree(idx);CHKERRQ(ierr); 4637 *brstart = imark; 4638 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4639 } else { 4640 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4641 isrowb = *rowb; iscolb = *colb; 4642 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4643 bseq[0] = *B_seq; 4644 } 4645 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4646 *B_seq = bseq[0]; 4647 ierr = PetscFree(bseq);CHKERRQ(ierr); 4648 if (!rowb){ 4649 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4650 } else { 4651 *rowb = isrowb; 4652 } 4653 if (!colb){ 4654 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4655 } else { 4656 *colb = iscolb; 4657 } 4658 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4659 PetscFunctionReturn(0); 4660 } 4661 4662 #undef __FUNCT__ 4663 #define __FUNCT__ "MatGetBrowsOfAoCols" 4664 /*@C 4665 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4666 of the OFF-DIAGONAL portion of local A 4667 4668 Collective on Mat 4669 4670 Input Parameters: 4671 + A,B - the matrices in mpiaij format 4672 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4673 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4674 . startsj_r - similar to startsj for receives 4675 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4676 4677 Output Parameter: 4678 + B_oth - the sequential matrix generated 4679 4680 Level: developer 4681 4682 @*/ 4683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4684 { 4685 VecScatter_MPI_General *gen_to,*gen_from; 4686 PetscErrorCode ierr; 4687 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4688 Mat_SeqAIJ *b_oth; 4689 VecScatter ctx=a->Mvctx; 4690 MPI_Comm comm=((PetscObject)ctx)->comm; 4691 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4692 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4693 PetscScalar *rvalues,*svalues; 4694 MatScalar *b_otha,*bufa,*bufA; 4695 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4696 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4697 MPI_Status *sstatus,rstatus; 4698 PetscMPIInt jj; 4699 PetscInt *cols,sbs,rbs; 4700 PetscScalar *vals; 4701 4702 PetscFunctionBegin; 4703 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4704 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4705 } 4706 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4708 4709 gen_to = (VecScatter_MPI_General*)ctx->todata; 4710 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4711 rvalues = gen_from->values; /* holds the length of receiving row */ 4712 svalues = gen_to->values; /* holds the length of sending row */ 4713 nrecvs = gen_from->n; 4714 nsends = gen_to->n; 4715 4716 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4717 srow = gen_to->indices; /* local row index to be sent */ 4718 sstarts = gen_to->starts; 4719 sprocs = gen_to->procs; 4720 sstatus = gen_to->sstatus; 4721 sbs = gen_to->bs; 4722 rstarts = gen_from->starts; 4723 rprocs = gen_from->procs; 4724 rbs = gen_from->bs; 4725 4726 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4727 if (scall == MAT_INITIAL_MATRIX){ 4728 /* i-array */ 4729 /*---------*/ 4730 /* post receives */ 4731 for (i=0; i<nrecvs; i++){ 4732 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4733 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4734 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4735 } 4736 4737 /* pack the outgoing message */ 4738 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4739 sstartsj[0] = 0; rstartsj[0] = 0; 4740 len = 0; /* total length of j or a array to be sent */ 4741 k = 0; 4742 for (i=0; i<nsends; i++){ 4743 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4744 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4745 for (j=0; j<nrows; j++) { 4746 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4747 for (l=0; l<sbs; l++){ 4748 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4749 rowlen[j*sbs+l] = ncols; 4750 len += ncols; 4751 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4752 } 4753 k++; 4754 } 4755 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4756 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4757 } 4758 /* recvs and sends of i-array are completed */ 4759 i = nrecvs; 4760 while (i--) { 4761 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4762 } 4763 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4764 4765 /* allocate buffers for sending j and a arrays */ 4766 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4767 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4768 4769 /* create i-array of B_oth */ 4770 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4771 b_othi[0] = 0; 4772 len = 0; /* total length of j or a array to be received */ 4773 k = 0; 4774 for (i=0; i<nrecvs; i++){ 4775 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4776 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4777 for (j=0; j<nrows; j++) { 4778 b_othi[k+1] = b_othi[k] + rowlen[j]; 4779 len += rowlen[j]; k++; 4780 } 4781 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4782 } 4783 4784 /* allocate space for j and a arrrays of B_oth */ 4785 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4786 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4787 4788 /* j-array */ 4789 /*---------*/ 4790 /* post receives of j-array */ 4791 for (i=0; i<nrecvs; i++){ 4792 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4793 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4794 } 4795 4796 /* pack the outgoing message j-array */ 4797 k = 0; 4798 for (i=0; i<nsends; i++){ 4799 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4800 bufJ = bufj+sstartsj[i]; 4801 for (j=0; j<nrows; j++) { 4802 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4803 for (ll=0; ll<sbs; ll++){ 4804 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4805 for (l=0; l<ncols; l++){ 4806 *bufJ++ = cols[l]; 4807 } 4808 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4809 } 4810 } 4811 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4812 } 4813 4814 /* recvs and sends of j-array are completed */ 4815 i = nrecvs; 4816 while (i--) { 4817 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4818 } 4819 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4820 } else if (scall == MAT_REUSE_MATRIX){ 4821 sstartsj = *startsj; 4822 rstartsj = *startsj_r; 4823 bufa = *bufa_ptr; 4824 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4825 b_otha = b_oth->a; 4826 } else { 4827 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4828 } 4829 4830 /* a-array */ 4831 /*---------*/ 4832 /* post receives of a-array */ 4833 for (i=0; i<nrecvs; i++){ 4834 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4835 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4836 } 4837 4838 /* pack the outgoing message a-array */ 4839 k = 0; 4840 for (i=0; i<nsends; i++){ 4841 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4842 bufA = bufa+sstartsj[i]; 4843 for (j=0; j<nrows; j++) { 4844 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4845 for (ll=0; ll<sbs; ll++){ 4846 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4847 for (l=0; l<ncols; l++){ 4848 *bufA++ = vals[l]; 4849 } 4850 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4851 } 4852 } 4853 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4854 } 4855 /* recvs and sends of a-array are completed */ 4856 i = nrecvs; 4857 while (i--) { 4858 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4859 } 4860 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4861 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4862 4863 if (scall == MAT_INITIAL_MATRIX){ 4864 /* put together the new matrix */ 4865 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4866 4867 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4868 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4869 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4870 b_oth->free_a = PETSC_TRUE; 4871 b_oth->free_ij = PETSC_TRUE; 4872 b_oth->nonew = 0; 4873 4874 ierr = PetscFree(bufj);CHKERRQ(ierr); 4875 if (!startsj || !bufa_ptr){ 4876 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4877 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4878 } else { 4879 *startsj = sstartsj; 4880 *startsj_r = rstartsj; 4881 *bufa_ptr = bufa; 4882 } 4883 } 4884 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4885 PetscFunctionReturn(0); 4886 } 4887 4888 #undef __FUNCT__ 4889 #define __FUNCT__ "MatGetCommunicationStructs" 4890 /*@C 4891 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4892 4893 Not Collective 4894 4895 Input Parameters: 4896 . A - The matrix in mpiaij format 4897 4898 Output Parameter: 4899 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4900 . colmap - A map from global column index to local index into lvec 4901 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4902 4903 Level: developer 4904 4905 @*/ 4906 #if defined (PETSC_USE_CTABLE) 4907 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4908 #else 4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4910 #endif 4911 { 4912 Mat_MPIAIJ *a; 4913 4914 PetscFunctionBegin; 4915 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 4916 PetscValidPointer(lvec, 2) 4917 PetscValidPointer(colmap, 3) 4918 PetscValidPointer(multScatter, 4) 4919 a = (Mat_MPIAIJ *) A->data; 4920 if (lvec) *lvec = a->lvec; 4921 if (colmap) *colmap = a->colmap; 4922 if (multScatter) *multScatter = a->Mvctx; 4923 PetscFunctionReturn(0); 4924 } 4925 4926 EXTERN_C_BEGIN 4927 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*); 4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*); 4929 EXTERN_C_END 4930 4931 #undef __FUNCT__ 4932 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4933 /* 4934 Computes (B'*A')' since computing B*A directly is untenable 4935 4936 n p p 4937 ( ) ( ) ( ) 4938 m ( A ) * n ( B ) = m ( C ) 4939 ( ) ( ) ( ) 4940 4941 */ 4942 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4943 { 4944 PetscErrorCode ierr; 4945 Mat At,Bt,Ct; 4946 4947 PetscFunctionBegin; 4948 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4949 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4950 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4951 ierr = MatDestroy(At);CHKERRQ(ierr); 4952 ierr = MatDestroy(Bt);CHKERRQ(ierr); 4953 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4954 ierr = MatDestroy(Ct);CHKERRQ(ierr); 4955 PetscFunctionReturn(0); 4956 } 4957 4958 #undef __FUNCT__ 4959 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4960 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4961 { 4962 PetscErrorCode ierr; 4963 PetscInt m=A->rmap->n,n=B->cmap->n; 4964 Mat Cmat; 4965 4966 PetscFunctionBegin; 4967 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4968 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 4969 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4970 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4971 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 4972 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4973 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4974 *C = Cmat; 4975 PetscFunctionReturn(0); 4976 } 4977 4978 /* ----------------------------------------------------------------*/ 4979 #undef __FUNCT__ 4980 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4981 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4982 { 4983 PetscErrorCode ierr; 4984 4985 PetscFunctionBegin; 4986 if (scall == MAT_INITIAL_MATRIX){ 4987 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4988 } 4989 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 4993 EXTERN_C_BEGIN 4994 #if defined(PETSC_HAVE_MUMPS) 4995 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*); 4996 #endif 4997 #if defined(PETSC_HAVE_PASTIX) 4998 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 4999 #endif 5000 #if defined(PETSC_HAVE_SUPERLU_DIST) 5001 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5002 #endif 5003 #if defined(PETSC_HAVE_SPOOLES) 5004 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5005 #endif 5006 EXTERN_C_END 5007 5008 /*MC 5009 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5010 5011 Options Database Keys: 5012 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5013 5014 Level: beginner 5015 5016 .seealso: MatCreateMPIAIJ() 5017 M*/ 5018 5019 EXTERN_C_BEGIN 5020 #undef __FUNCT__ 5021 #define __FUNCT__ "MatCreate_MPIAIJ" 5022 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5023 { 5024 Mat_MPIAIJ *b; 5025 PetscErrorCode ierr; 5026 PetscMPIInt size; 5027 5028 PetscFunctionBegin; 5029 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5030 5031 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5032 B->data = (void*)b; 5033 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5034 B->rmap->bs = 1; 5035 B->assembled = PETSC_FALSE; 5036 B->mapping = 0; 5037 5038 B->insertmode = NOT_SET_VALUES; 5039 b->size = size; 5040 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5041 5042 /* build cache for off array entries formed */ 5043 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5044 b->donotstash = PETSC_FALSE; 5045 b->colmap = 0; 5046 b->garray = 0; 5047 b->roworiented = PETSC_TRUE; 5048 5049 /* stuff used for matrix vector multiply */ 5050 b->lvec = PETSC_NULL; 5051 b->Mvctx = PETSC_NULL; 5052 5053 /* stuff for MatGetRow() */ 5054 b->rowindices = 0; 5055 b->rowvalues = 0; 5056 b->getrowactive = PETSC_FALSE; 5057 5058 #if defined(PETSC_HAVE_SPOOLES) 5059 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5060 "MatGetFactor_mpiaij_spooles", 5061 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5062 #endif 5063 #if defined(PETSC_HAVE_MUMPS) 5064 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5065 "MatGetFactor_mpiaij_mumps", 5066 MatGetFactor_mpiaij_mumps);CHKERRQ(ierr); 5067 #endif 5068 #if defined(PETSC_HAVE_PASTIX) 5069 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5070 "MatGetFactor_mpiaij_pastix", 5071 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5072 #endif 5073 #if defined(PETSC_HAVE_SUPERLU_DIST) 5074 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5075 "MatGetFactor_mpiaij_superlu_dist", 5076 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5077 #endif 5078 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5079 "MatStoreValues_MPIAIJ", 5080 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5082 "MatRetrieveValues_MPIAIJ", 5083 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5085 "MatGetDiagonalBlock_MPIAIJ", 5086 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5088 "MatIsTranspose_MPIAIJ", 5089 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5091 "MatMPIAIJSetPreallocation_MPIAIJ", 5092 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5094 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5095 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5097 "MatDiagonalScaleLocal_MPIAIJ", 5098 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C", 5100 "MatConvert_MPIAIJ_MPICSRPERM", 5101 MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C", 5103 "MatConvert_MPIAIJ_MPICRL", 5104 MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr); 5105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5106 "MatConvert_MPIAIJ_MPISBAIJ", 5107 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5109 "MatMatMult_MPIDense_MPIAIJ", 5110 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5112 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5113 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5114 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5115 "MatMatMultNumeric_MPIDense_MPIAIJ", 5116 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5117 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5118 PetscFunctionReturn(0); 5119 } 5120 EXTERN_C_END 5121 5122 #undef __FUNCT__ 5123 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5124 /*@ 5125 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5126 and "off-diagonal" part of the matrix in CSR format. 5127 5128 Collective on MPI_Comm 5129 5130 Input Parameters: 5131 + comm - MPI communicator 5132 . m - number of local rows (Cannot be PETSC_DECIDE) 5133 . n - This value should be the same as the local size used in creating the 5134 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5135 calculated if N is given) For square matrices n is almost always m. 5136 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5137 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5138 . i - row indices for "diagonal" portion of matrix 5139 . j - column indices 5140 . a - matrix values 5141 . oi - row indices for "off-diagonal" portion of matrix 5142 . oj - column indices 5143 - oa - matrix values 5144 5145 Output Parameter: 5146 . mat - the matrix 5147 5148 Level: advanced 5149 5150 Notes: 5151 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5152 5153 The i and j indices are 0 based 5154 5155 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5156 5157 This sets local rows and cannot be used to set off-processor values. 5158 5159 You cannot later use MatSetValues() to change values in this matrix. 5160 5161 .keywords: matrix, aij, compressed row, sparse, parallel 5162 5163 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5164 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5165 @*/ 5166 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5167 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5168 { 5169 PetscErrorCode ierr; 5170 Mat_MPIAIJ *maij; 5171 5172 PetscFunctionBegin; 5173 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5174 if (i[0]) { 5175 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5176 } 5177 if (oi[0]) { 5178 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5179 } 5180 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5181 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5182 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5183 maij = (Mat_MPIAIJ*) (*mat)->data; 5184 maij->donotstash = PETSC_TRUE; 5185 (*mat)->preallocated = PETSC_TRUE; 5186 5187 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5188 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5189 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5190 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5191 5192 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5193 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5194 5195 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5196 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5197 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5198 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5199 5200 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5201 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5202 PetscFunctionReturn(0); 5203 } 5204 5205 /* 5206 Special version for direct calls from Fortran 5207 */ 5208 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5209 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5210 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5211 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5212 #endif 5213 5214 /* Change these macros so can be used in void function */ 5215 #undef CHKERRQ 5216 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5217 #undef SETERRQ2 5218 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5219 #undef SETERRQ 5220 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr) 5221 5222 EXTERN_C_BEGIN 5223 #undef __FUNCT__ 5224 #define __FUNCT__ "matsetvaluesmpiaij_" 5225 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5226 { 5227 Mat mat = *mmat; 5228 PetscInt m = *mm, n = *mn; 5229 InsertMode addv = *maddv; 5230 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5231 PetscScalar value; 5232 PetscErrorCode ierr; 5233 5234 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5235 if (mat->insertmode == NOT_SET_VALUES) { 5236 mat->insertmode = addv; 5237 } 5238 #if defined(PETSC_USE_DEBUG) 5239 else if (mat->insertmode != addv) { 5240 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5241 } 5242 #endif 5243 { 5244 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5245 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5246 PetscTruth roworiented = aij->roworiented; 5247 5248 /* Some Variables required in the macro */ 5249 Mat A = aij->A; 5250 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5251 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5252 MatScalar *aa = a->a; 5253 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5254 Mat B = aij->B; 5255 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5256 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5257 MatScalar *ba = b->a; 5258 5259 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5260 PetscInt nonew = a->nonew; 5261 MatScalar *ap1,*ap2; 5262 5263 PetscFunctionBegin; 5264 for (i=0; i<m; i++) { 5265 if (im[i] < 0) continue; 5266 #if defined(PETSC_USE_DEBUG) 5267 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5268 #endif 5269 if (im[i] >= rstart && im[i] < rend) { 5270 row = im[i] - rstart; 5271 lastcol1 = -1; 5272 rp1 = aj + ai[row]; 5273 ap1 = aa + ai[row]; 5274 rmax1 = aimax[row]; 5275 nrow1 = ailen[row]; 5276 low1 = 0; 5277 high1 = nrow1; 5278 lastcol2 = -1; 5279 rp2 = bj + bi[row]; 5280 ap2 = ba + bi[row]; 5281 rmax2 = bimax[row]; 5282 nrow2 = bilen[row]; 5283 low2 = 0; 5284 high2 = nrow2; 5285 5286 for (j=0; j<n; j++) { 5287 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5288 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5289 if (in[j] >= cstart && in[j] < cend){ 5290 col = in[j] - cstart; 5291 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5292 } else if (in[j] < 0) continue; 5293 #if defined(PETSC_USE_DEBUG) 5294 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5295 #endif 5296 else { 5297 if (mat->was_assembled) { 5298 if (!aij->colmap) { 5299 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5300 } 5301 #if defined (PETSC_USE_CTABLE) 5302 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5303 col--; 5304 #else 5305 col = aij->colmap[in[j]] - 1; 5306 #endif 5307 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5308 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5309 col = in[j]; 5310 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5311 B = aij->B; 5312 b = (Mat_SeqAIJ*)B->data; 5313 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5314 rp2 = bj + bi[row]; 5315 ap2 = ba + bi[row]; 5316 rmax2 = bimax[row]; 5317 nrow2 = bilen[row]; 5318 low2 = 0; 5319 high2 = nrow2; 5320 bm = aij->B->rmap->n; 5321 ba = b->a; 5322 } 5323 } else col = in[j]; 5324 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5325 } 5326 } 5327 } else { 5328 if (!aij->donotstash) { 5329 if (roworiented) { 5330 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5331 } else { 5332 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5333 } 5334 } 5335 } 5336 }} 5337 PetscFunctionReturnVoid(); 5338 } 5339 EXTERN_C_END 5340 5341