1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatDistribute_MPIAIJ" 8 /* 9 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 10 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 11 12 Only for square matrices 13 */ 14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 15 { 16 PetscMPIInt rank,size; 17 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 18 PetscErrorCode ierr; 19 Mat mat; 20 Mat_SeqAIJ *gmata; 21 PetscMPIInt tag; 22 MPI_Status status; 23 PetscBool aij; 24 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 25 26 PetscFunctionBegin; 27 CHKMEMQ; 28 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 29 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 30 if (!rank) { 31 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 32 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 33 } 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 36 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 37 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 38 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 39 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 40 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 41 rowners[0] = 0; 42 for (i=2; i<=size; i++) { 43 rowners[i] += rowners[i-1]; 44 } 45 rstart = rowners[rank]; 46 rend = rowners[rank+1]; 47 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 48 if (!rank) { 49 gmata = (Mat_SeqAIJ*) gmat->data; 50 /* send row lengths to all processors */ 51 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 52 for (i=1; i<size; i++) { 53 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 54 } 55 /* determine number diagonal and off-diagonal counts */ 56 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 58 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 59 jj = 0; 60 for (i=0; i<m; i++) { 61 for (j=0; j<dlens[i]; j++) { 62 if (gmata->j[jj] < rstart) ld[i]++; 63 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 64 jj++; 65 } 66 } 67 /* send column indices to other processes */ 68 for (i=1; i<size; i++) { 69 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 70 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 71 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 72 } 73 74 /* send numerical values to other processes */ 75 for (i=1; i<size; i++) { 76 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 77 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 78 } 79 gmataa = gmata->a; 80 gmataj = gmata->j; 81 82 } else { 83 /* receive row lengths */ 84 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 85 /* receive column indices */ 86 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 87 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 88 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 89 /* determine number diagonal and off-diagonal counts */ 90 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 92 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 93 jj = 0; 94 for (i=0; i<m; i++) { 95 for (j=0; j<dlens[i]; j++) { 96 if (gmataj[jj] < rstart) ld[i]++; 97 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 98 jj++; 99 } 100 } 101 /* receive numerical values */ 102 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 104 } 105 /* set preallocation */ 106 for (i=0; i<m; i++) { 107 dlens[i] -= olens[i]; 108 } 109 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 110 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 111 112 for (i=0; i<m; i++) { 113 dlens[i] += olens[i]; 114 } 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 row = rstart + i; 118 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 119 cnt += dlens[i]; 120 } 121 if (rank) { 122 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 123 } 124 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 125 ierr = PetscFree(rowners);CHKERRQ(ierr); 126 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 127 *inmat = mat; 128 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 129 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 130 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 131 mat = *inmat; 132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 133 if (!rank) { 134 /* send numerical values to other processes */ 135 gmata = (Mat_SeqAIJ*) gmat->data; 136 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 137 gmataa = gmata->a; 138 for (i=1; i<size; i++) { 139 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 140 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 141 } 142 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 143 } else { 144 /* receive numerical values from process 0*/ 145 nz = Ad->nz + Ao->nz; 146 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 147 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 148 } 149 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 150 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 151 ad = Ad->a; 152 ao = Ao->a; 153 if (mat->rmap->n) { 154 i = 0; 155 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 156 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 157 } 158 for (i=1; i<mat->rmap->n; i++) { 159 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 160 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 161 } 162 i--; 163 if (mat->rmap->n) { 164 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 165 } 166 if (rank) { 167 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 168 } 169 } 170 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 CHKMEMQ; 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 Local utility routine that creates a mapping from the global column 178 number to the local number in the off-diagonal part of the local 179 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 180 a slightly higher hash table cost; without it it is not scalable (each processor 181 has an order N integer array but is fast to acess. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscErrorCode ierr; 189 PetscInt n = aij->B->cmap->n,i; 190 191 PetscFunctionBegin; 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 194 for (i=0; i<n; i++){ 195 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 #else 198 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 201 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 202 #endif 203 PetscFunctionReturn(0); 204 } 205 206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 207 { \ 208 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 209 lastcol1 = col;\ 210 while (high1-low1 > 5) { \ 211 t = (low1+high1)/2; \ 212 if (rp1[t] > col) high1 = t; \ 213 else low1 = t; \ 214 } \ 215 for (_i=low1; _i<high1; _i++) { \ 216 if (rp1[_i] > col) break; \ 217 if (rp1[_i] == col) { \ 218 if (addv == ADD_VALUES) ap1[_i] += value; \ 219 else ap1[_i] = value; \ 220 goto a_noinsert; \ 221 } \ 222 } \ 223 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 224 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 226 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 227 N = nrow1++ - 1; a->nz++; high1++; \ 228 /* shift up all the later entries in this row */ \ 229 for (ii=N; ii>=_i; ii--) { \ 230 rp1[ii+1] = rp1[ii]; \ 231 ap1[ii+1] = ap1[ii]; \ 232 } \ 233 rp1[_i] = col; \ 234 ap1[_i] = value; \ 235 a_noinsert: ; \ 236 ailen[row] = nrow1; \ 237 } 238 239 240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 241 { \ 242 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 243 lastcol2 = col;\ 244 while (high2-low2 > 5) { \ 245 t = (low2+high2)/2; \ 246 if (rp2[t] > col) high2 = t; \ 247 else low2 = t; \ 248 } \ 249 for (_i=low2; _i<high2; _i++) { \ 250 if (rp2[_i] > col) break; \ 251 if (rp2[_i] == col) { \ 252 if (addv == ADD_VALUES) ap2[_i] += value; \ 253 else ap2[_i] = value; \ 254 goto b_noinsert; \ 255 } \ 256 } \ 257 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 258 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 259 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 260 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 261 N = nrow2++ - 1; b->nz++; high2++; \ 262 /* shift up all the later entries in this row */ \ 263 for (ii=N; ii>=_i; ii--) { \ 264 rp2[ii+1] = rp2[ii]; \ 265 ap2[ii+1] = ap2[ii]; \ 266 } \ 267 rp2[_i] = col; \ 268 ap2[_i] = value; \ 269 b_noinsert: ; \ 270 bilen[row] = nrow2; \ 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 276 { 277 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 279 PetscErrorCode ierr; 280 PetscInt l,*garray = mat->garray,diag; 281 282 PetscFunctionBegin; 283 /* code only works for square matrices A */ 284 285 /* find size of row to the left of the diagonal part */ 286 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 287 row = row - diag; 288 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 289 if (garray[b->j[b->i[row]+l]] > diag) break; 290 } 291 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 /* diagonal part */ 294 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 295 296 /* right of diagonal part */ 297 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatSetValues_MPIAIJ" 303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscScalar value; 307 PetscErrorCode ierr; 308 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 309 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 310 PetscBool roworiented = aij->roworiented; 311 312 /* Some Variables required in the macro */ 313 Mat A = aij->A; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 316 MatScalar *aa = a->a; 317 PetscBool ignorezeroentries = a->ignorezeroentries; 318 Mat B = aij->B; 319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 321 MatScalar *ba = b->a; 322 323 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 324 PetscInt nonew = a->nonew; 325 MatScalar *ap1,*ap2; 326 327 PetscFunctionBegin; 328 if (v) PetscValidScalarPointer(v,6); 329 for (i=0; i<m; i++) { 330 if (im[i] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 333 #endif 334 if (im[i] >= rstart && im[i] < rend) { 335 row = im[i] - rstart; 336 lastcol1 = -1; 337 rp1 = aj + ai[row]; 338 ap1 = aa + ai[row]; 339 rmax1 = aimax[row]; 340 nrow1 = ailen[row]; 341 low1 = 0; 342 high1 = nrow1; 343 lastcol2 = -1; 344 rp2 = bj + bi[row]; 345 ap2 = ba + bi[row]; 346 rmax2 = bimax[row]; 347 nrow2 = bilen[row]; 348 low2 = 0; 349 high2 = nrow2; 350 351 for (j=0; j<n; j++) { 352 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 353 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 354 if (in[j] >= cstart && in[j] < cend){ 355 col = in[j] - cstart; 356 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 357 } else if (in[j] < 0) continue; 358 #if defined(PETSC_USE_DEBUG) 359 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 360 #endif 361 else { 362 if (mat->was_assembled) { 363 if (!aij->colmap) { 364 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined (PETSC_USE_CTABLE) 367 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 368 col--; 369 #else 370 col = aij->colmap[in[j]] - 1; 371 #endif 372 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 373 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 374 col = in[j]; 375 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 376 B = aij->B; 377 b = (Mat_SeqAIJ*)B->data; 378 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 379 rp2 = bj + bi[row]; 380 ap2 = ba + bi[row]; 381 rmax2 = bimax[row]; 382 nrow2 = bilen[row]; 383 low2 = 0; 384 high2 = nrow2; 385 bm = aij->B->rmap->n; 386 ba = b->a; 387 } 388 } else col = in[j]; 389 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 390 } 391 } 392 } else { 393 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 394 if (!aij->donotstash) { 395 if (roworiented) { 396 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 397 } else { 398 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 399 } 400 } 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetValues_MPIAIJ" 408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 413 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 414 415 PetscFunctionBegin; 416 for (i=0; i<m; i++) { 417 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 418 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 419 if (idxm[i] >= rstart && idxm[i] < rend) { 420 row = idxm[i] - rstart; 421 for (j=0; j<n; j++) { 422 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 423 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 424 if (idxn[j] >= cstart && idxn[j] < cend){ 425 col = idxn[j] - cstart; 426 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 427 } else { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 430 } 431 #if defined (PETSC_USE_CTABLE) 432 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 433 col --; 434 #else 435 col = aij->colmap[idxn[j]] - 1; 436 #endif 437 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 438 else { 439 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 440 } 441 } 442 } 443 } else { 444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscErrorCode ierr; 458 PetscInt nstash,reallocs; 459 InsertMode addv; 460 461 PetscFunctionBegin; 462 if (aij->donotstash || mat->nooffprocentries) { 463 PetscFunctionReturn(0); 464 } 465 466 /* make sure all processors are either in INSERTMODE or ADDMODE */ 467 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 468 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 469 mat->insertmode = addv; /* in case this processor had no cache */ 470 471 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 483 PetscErrorCode ierr; 484 PetscMPIInt n; 485 PetscInt i,j,rstart,ncols,flg; 486 PetscInt *row,*col; 487 PetscBool other_disassembled; 488 PetscScalar *val; 489 InsertMode addv = mat->insertmode; 490 491 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 492 PetscFunctionBegin; 493 if (!aij->donotstash && !mat->nooffprocentries) { 494 while (1) { 495 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 496 if (!flg) break; 497 498 for (i=0; i<n;) { 499 /* Now identify the consecutive vals belonging to the same row */ 500 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 501 if (j < n) ncols = j-i; 502 else ncols = n-i; 503 /* Now assemble all these values with a single function call */ 504 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 505 i = j; 506 } 507 } 508 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 509 } 510 a->compressedrow.use = PETSC_FALSE; 511 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 512 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 513 514 /* determine if any processor has disassembled, if so we must 515 also disassemble ourselfs, in order that we may reassemble. */ 516 /* 517 if nonzero structure of submatrix B cannot change then we know that 518 no processor disassembled thus we can skip this stuff 519 */ 520 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 521 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 522 if (mat->was_assembled && !other_disassembled) { 523 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 524 } 525 } 526 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 527 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 528 } 529 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 530 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 531 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 533 534 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 535 aij->rowvalues = 0; 536 537 /* used by MatAXPY() */ 538 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 539 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 540 541 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;} 542 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 549 { 550 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 555 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatZeroRows_MPIAIJ" 561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 562 { 563 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 564 PetscErrorCode ierr; 565 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 566 PetscInt i,*owners = A->rmap->range; 567 PetscInt *nprocs,j,idx,nsends,row; 568 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 569 PetscInt *rvalues,count,base,slen,*source; 570 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 571 MPI_Comm comm = ((PetscObject)A)->comm; 572 MPI_Request *send_waits,*recv_waits; 573 MPI_Status recv_status,*send_status; 574 #if defined(PETSC_DEBUG) 575 PetscBool found = PETSC_FALSE; 576 #endif 577 578 PetscFunctionBegin; 579 /* first count number of contributors to each processor */ 580 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 581 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 582 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 583 j = 0; 584 for (i=0; i<N; i++) { 585 if (lastidx > (idx = rows[i])) j = 0; 586 lastidx = idx; 587 for (; j<size; j++) { 588 if (idx >= owners[j] && idx < owners[j+1]) { 589 nprocs[2*j]++; 590 nprocs[2*j+1] = 1; 591 owner[i] = j; 592 #if defined(PETSC_DEBUG) 593 found = PETSC_TRUE; 594 #endif 595 break; 596 } 597 } 598 #if defined(PETSC_DEBUG) 599 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 600 found = PETSC_FALSE; 601 #endif 602 } 603 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 604 605 if (A->nooffproczerorows) { 606 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 607 nrecvs = nsends; 608 nmax = N; 609 } else { 610 /* inform other processors of number of messages and max length*/ 611 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 612 } 613 614 /* post receives: */ 615 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 616 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 617 for (i=0; i<nrecvs; i++) { 618 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 619 } 620 621 /* do sends: 622 1) starts[i] gives the starting index in svalues for stuff going to 623 the ith processor 624 */ 625 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 626 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 627 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 628 starts[0] = 0; 629 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 630 for (i=0; i<N; i++) { 631 svalues[starts[owner[i]]++] = rows[i]; 632 } 633 634 starts[0] = 0; 635 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 636 count = 0; 637 for (i=0; i<size; i++) { 638 if (nprocs[2*i+1]) { 639 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 640 } 641 } 642 ierr = PetscFree(starts);CHKERRQ(ierr); 643 644 base = owners[rank]; 645 646 /* wait on receives */ 647 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 648 count = nrecvs; slen = 0; 649 while (count) { 650 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 651 /* unpack receives into our local space */ 652 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 653 source[imdex] = recv_status.MPI_SOURCE; 654 lens[imdex] = n; 655 slen += n; 656 count--; 657 } 658 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 659 660 /* move the data into the send scatter */ 661 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 662 count = 0; 663 for (i=0; i<nrecvs; i++) { 664 values = rvalues + i*nmax; 665 for (j=0; j<lens[i]; j++) { 666 lrows[count++] = values[j] - base; 667 } 668 } 669 ierr = PetscFree(rvalues);CHKERRQ(ierr); 670 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 671 ierr = PetscFree(owner);CHKERRQ(ierr); 672 ierr = PetscFree(nprocs);CHKERRQ(ierr); 673 674 /* actually zap the local rows */ 675 /* 676 Zero the required rows. If the "diagonal block" of the matrix 677 is square and the user wishes to set the diagonal we use separate 678 code so that MatSetValues() is not called for each diagonal allocating 679 new memory, thus calling lots of mallocs and slowing things down. 680 681 */ 682 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 683 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 684 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 685 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 686 } else if (diag != 0.0) { 687 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 688 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 691 } 692 for (i = 0; i < slen; i++) { 693 row = lrows[i] + rstart; 694 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 695 } 696 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 697 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 698 } else { 699 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 700 } 701 ierr = PetscFree(lrows);CHKERRQ(ierr); 702 703 /* wait on sends */ 704 if (nsends) { 705 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 706 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 707 ierr = PetscFree(send_status);CHKERRQ(ierr); 708 } 709 ierr = PetscFree(send_waits);CHKERRQ(ierr); 710 ierr = PetscFree(svalues);CHKERRQ(ierr); 711 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatMult_MPIAIJ" 717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 PetscInt nt; 722 723 PetscFunctionBegin; 724 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 725 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 726 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 728 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 736 { 737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatMultAdd_MPIAIJ" 747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 748 { 749 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 755 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 763 { 764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 765 PetscErrorCode ierr; 766 PetscBool merged; 767 768 PetscFunctionBegin; 769 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 770 /* do nondiagonal part */ 771 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 772 if (!merged) { 773 /* send it on its way */ 774 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 775 /* do local part */ 776 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 777 /* receive remote parts: note this assumes the values are not actually */ 778 /* added in yy until the next line, */ 779 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 780 } else { 781 /* do local part */ 782 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 783 /* send it on its way */ 784 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 /* values actually were received in the Begin() but we need to call this nop */ 786 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 EXTERN_C_BEGIN 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 795 { 796 MPI_Comm comm; 797 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 798 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 799 IS Me,Notme; 800 PetscErrorCode ierr; 801 PetscInt M,N,first,last,*notme,i; 802 PetscMPIInt size; 803 804 PetscFunctionBegin; 805 806 /* Easy test: symmetric diagonal block */ 807 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 808 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 809 if (!*f) PetscFunctionReturn(0); 810 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 812 if (size == 1) PetscFunctionReturn(0); 813 814 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 815 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 816 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 817 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 818 for (i=0; i<first; i++) notme[i] = i; 819 for (i=last; i<M; i++) notme[i-last+first] = i; 820 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 821 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 822 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 823 Aoff = Aoffs[0]; 824 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 825 Boff = Boffs[0]; 826 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 827 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 828 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 829 ierr = ISDestroy(Me);CHKERRQ(ierr); 830 ierr = ISDestroy(Notme);CHKERRQ(ierr); 831 832 PetscFunctionReturn(0); 833 } 834 EXTERN_C_END 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 839 { 840 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 /* do nondiagonal part */ 845 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 846 /* send it on its way */ 847 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 848 /* do local part */ 849 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 850 /* receive remote parts */ 851 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 /* 856 This only works correctly for square matrices where the subblock A->A is the 857 diagonal block 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 862 { 863 PetscErrorCode ierr; 864 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 865 866 PetscFunctionBegin; 867 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 868 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 869 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatScale_MPIAIJ" 875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 876 { 877 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 882 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatDestroy_MPIAIJ" 888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 #if defined(PETSC_USE_LOG) 895 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 896 #endif 897 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 898 if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);} 899 if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);} 900 if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);} 901 #if defined (PETSC_USE_CTABLE) 902 if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);} 903 #else 904 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 905 #endif 906 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 907 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 908 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 909 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 910 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 911 ierr = PetscFree(aij);CHKERRQ(ierr); 912 913 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatView_MPIAIJ_Binary" 927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 928 { 929 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 930 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 931 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 932 PetscErrorCode ierr; 933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 934 int fd; 935 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 936 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 937 PetscScalar *column_values; 938 PetscInt message_count,flowcontrolcount; 939 940 PetscFunctionBegin; 941 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 942 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 943 nz = A->nz + B->nz; 944 if (!rank) { 945 header[0] = MAT_FILE_CLASSID; 946 header[1] = mat->rmap->N; 947 header[2] = mat->cmap->N; 948 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 949 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 950 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 951 /* get largest number of rows any processor has */ 952 rlen = mat->rmap->n; 953 range = mat->rmap->range; 954 for (i=1; i<size; i++) { 955 rlen = PetscMax(rlen,range[i+1] - range[i]); 956 } 957 } else { 958 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 959 rlen = mat->rmap->n; 960 } 961 962 /* load up the local row counts */ 963 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 964 for (i=0; i<mat->rmap->n; i++) { 965 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 966 } 967 968 /* store the row lengths to the file */ 969 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 970 if (!rank) { 971 MPI_Status status; 972 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 973 for (i=1; i<size; i++) { 974 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 975 rlen = range[i+1] - range[i]; 976 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 977 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 980 } else { 981 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 982 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 983 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 984 } 985 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 986 987 /* load up the local column indices */ 988 nzmax = nz; /* )th processor needs space a largest processor needs */ 989 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 990 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 991 cnt = 0; 992 for (i=0; i<mat->rmap->n; i++) { 993 for (j=B->i[i]; j<B->i[i+1]; j++) { 994 if ( (col = garray[B->j[j]]) > cstart) break; 995 column_indices[cnt++] = col; 996 } 997 for (k=A->i[i]; k<A->i[i+1]; k++) { 998 column_indices[cnt++] = A->j[k] + cstart; 999 } 1000 for (; j<B->i[i+1]; j++) { 1001 column_indices[cnt++] = garray[B->j[j]]; 1002 } 1003 } 1004 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1005 1006 /* store the column indices to the file */ 1007 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1008 if (!rank) { 1009 MPI_Status status; 1010 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1011 for (i=1; i<size; i++) { 1012 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1013 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1014 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1015 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1016 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1017 } 1018 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1019 } else { 1020 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1021 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1022 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1023 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1024 } 1025 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1026 1027 /* load up the local column values */ 1028 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1029 cnt = 0; 1030 for (i=0; i<mat->rmap->n; i++) { 1031 for (j=B->i[i]; j<B->i[i+1]; j++) { 1032 if ( garray[B->j[j]] > cstart) break; 1033 column_values[cnt++] = B->a[j]; 1034 } 1035 for (k=A->i[i]; k<A->i[i+1]; k++) { 1036 column_values[cnt++] = A->a[k]; 1037 } 1038 for (; j<B->i[i+1]; j++) { 1039 column_values[cnt++] = B->a[j]; 1040 } 1041 } 1042 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1043 1044 /* store the column values to the file */ 1045 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1046 if (!rank) { 1047 MPI_Status status; 1048 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1049 for (i=1; i<size; i++) { 1050 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1051 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1052 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1053 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1054 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1055 } 1056 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1057 } else { 1058 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1059 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1060 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1061 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(column_values);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1069 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1070 { 1071 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1072 PetscErrorCode ierr; 1073 PetscMPIInt rank = aij->rank,size = aij->size; 1074 PetscBool isdraw,iascii,isbinary; 1075 PetscViewer sviewer; 1076 PetscViewerFormat format; 1077 1078 PetscFunctionBegin; 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1082 if (iascii) { 1083 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1084 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1085 MatInfo info; 1086 PetscBool inodes; 1087 1088 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1089 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1090 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1091 if (!inodes) { 1092 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1093 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1094 } else { 1095 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1096 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1097 } 1098 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1099 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1100 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1102 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1103 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1104 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1107 PetscInt inodecount,inodelimit,*inodes; 1108 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1109 if (inodes) { 1110 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1111 } else { 1112 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1113 } 1114 PetscFunctionReturn(0); 1115 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1116 PetscFunctionReturn(0); 1117 } 1118 } else if (isbinary) { 1119 if (size == 1) { 1120 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1121 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1122 } else { 1123 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1124 } 1125 PetscFunctionReturn(0); 1126 } else if (isdraw) { 1127 PetscDraw draw; 1128 PetscBool isnull; 1129 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1130 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1131 } 1132 1133 if (size == 1) { 1134 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1135 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1136 } else { 1137 /* assemble the entire matrix onto first processor. */ 1138 Mat A; 1139 Mat_SeqAIJ *Aloc; 1140 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1141 MatScalar *a; 1142 1143 if (mat->rmap->N > 1024) { 1144 PetscBool flg = PETSC_FALSE; 1145 1146 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1147 if (!flg) { 1148 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1149 } 1150 } 1151 1152 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1153 if (!rank) { 1154 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1155 } else { 1156 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1157 } 1158 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1159 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1160 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1161 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1162 1163 /* copy over the A part */ 1164 Aloc = (Mat_SeqAIJ*)aij->A->data; 1165 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1166 row = mat->rmap->rstart; 1167 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1168 for (i=0; i<m; i++) { 1169 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1170 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1171 } 1172 aj = Aloc->j; 1173 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1174 1175 /* copy over the B part */ 1176 Aloc = (Mat_SeqAIJ*)aij->B->data; 1177 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1178 row = mat->rmap->rstart; 1179 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1180 ct = cols; 1181 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1182 for (i=0; i<m; i++) { 1183 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1184 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1185 } 1186 ierr = PetscFree(ct);CHKERRQ(ierr); 1187 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1188 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1189 /* 1190 Everyone has to call to draw the matrix since the graphics waits are 1191 synchronized across all processors that share the PetscDraw object 1192 */ 1193 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1194 if (!rank) { 1195 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1196 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1197 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1198 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1199 } 1200 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1201 ierr = MatDestroy(A);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatView_MPIAIJ" 1208 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1209 { 1210 PetscErrorCode ierr; 1211 PetscBool iascii,isdraw,issocket,isbinary; 1212 1213 PetscFunctionBegin; 1214 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1215 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1216 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1217 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1218 if (iascii || isdraw || isbinary || issocket) { 1219 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1220 } else { 1221 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1222 } 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatSOR_MPIAIJ" 1228 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1229 { 1230 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1231 PetscErrorCode ierr; 1232 Vec bb1 = 0; 1233 PetscBool hasop; 1234 1235 PetscFunctionBegin; 1236 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1237 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1238 } 1239 1240 if (flag == SOR_APPLY_UPPER) { 1241 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1246 if (flag & SOR_ZERO_INITIAL_GUESS) { 1247 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1248 its--; 1249 } 1250 1251 while (its--) { 1252 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 1255 /* update rhs: bb1 = bb - B*x */ 1256 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1257 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1258 1259 /* local sweep */ 1260 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1261 } 1262 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1263 if (flag & SOR_ZERO_INITIAL_GUESS) { 1264 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1265 its--; 1266 } 1267 while (its--) { 1268 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 1271 /* update rhs: bb1 = bb - B*x */ 1272 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1273 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1274 1275 /* local sweep */ 1276 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1277 } 1278 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1279 if (flag & SOR_ZERO_INITIAL_GUESS) { 1280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1281 its--; 1282 } 1283 while (its--) { 1284 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1286 1287 /* update rhs: bb1 = bb - B*x */ 1288 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1289 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1290 1291 /* local sweep */ 1292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1293 } 1294 } else if (flag & SOR_EISENSTAT) { 1295 Vec xx1; 1296 1297 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1298 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1299 1300 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1301 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 if (!mat->diag) { 1303 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1304 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1305 } 1306 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1307 if (hasop) { 1308 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1309 } else { 1310 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1311 } 1312 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1313 1314 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1315 1316 /* local sweep */ 1317 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1318 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1319 ierr = VecDestroy(xx1);CHKERRQ(ierr); 1320 } else { 1321 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1322 } 1323 1324 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatPermute_MPIAIJ" 1330 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1331 { 1332 MPI_Comm comm,pcomm; 1333 PetscInt first,local_size,nrows; 1334 const PetscInt *rows; 1335 PetscMPIInt size; 1336 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1337 PetscErrorCode ierr; 1338 1339 PetscFunctionBegin; 1340 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1341 /* make a collective version of 'rowp' */ 1342 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1343 if (pcomm==comm) { 1344 crowp = rowp; 1345 } else { 1346 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1347 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1348 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1349 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1350 } 1351 /* collect the global row permutation and invert it */ 1352 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1353 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1354 if (pcomm!=comm) { 1355 ierr = ISDestroy(crowp);CHKERRQ(ierr); 1356 } 1357 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1358 /* get the local target indices */ 1359 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1360 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1361 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1362 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1363 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1364 ierr = ISDestroy(irowp);CHKERRQ(ierr); 1365 /* the column permutation is so much easier; 1366 make a local version of 'colp' and invert it */ 1367 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1368 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1369 if (size==1) { 1370 lcolp = colp; 1371 } else { 1372 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1373 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1374 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1375 } 1376 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1377 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1378 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1379 if (size>1) { 1380 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1381 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 1382 } 1383 /* now we just get the submatrix */ 1384 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1385 /* clean up */ 1386 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 1387 ierr = ISDestroy(icolp);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1393 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1394 { 1395 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1396 Mat A = mat->A,B = mat->B; 1397 PetscErrorCode ierr; 1398 PetscReal isend[5],irecv[5]; 1399 1400 PetscFunctionBegin; 1401 info->block_size = 1.0; 1402 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1403 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1404 isend[3] = info->memory; isend[4] = info->mallocs; 1405 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1406 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1407 isend[3] += info->memory; isend[4] += info->mallocs; 1408 if (flag == MAT_LOCAL) { 1409 info->nz_used = isend[0]; 1410 info->nz_allocated = isend[1]; 1411 info->nz_unneeded = isend[2]; 1412 info->memory = isend[3]; 1413 info->mallocs = isend[4]; 1414 } else if (flag == MAT_GLOBAL_MAX) { 1415 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1416 info->nz_used = irecv[0]; 1417 info->nz_allocated = irecv[1]; 1418 info->nz_unneeded = irecv[2]; 1419 info->memory = irecv[3]; 1420 info->mallocs = irecv[4]; 1421 } else if (flag == MAT_GLOBAL_SUM) { 1422 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1423 info->nz_used = irecv[0]; 1424 info->nz_allocated = irecv[1]; 1425 info->nz_unneeded = irecv[2]; 1426 info->memory = irecv[3]; 1427 info->mallocs = irecv[4]; 1428 } 1429 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1430 info->fill_ratio_needed = 0; 1431 info->factor_mallocs = 0; 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatSetOption_MPIAIJ" 1438 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1439 { 1440 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1441 PetscErrorCode ierr; 1442 1443 PetscFunctionBegin; 1444 switch (op) { 1445 case MAT_NEW_NONZERO_LOCATIONS: 1446 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1447 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1448 case MAT_KEEP_NONZERO_PATTERN: 1449 case MAT_NEW_NONZERO_LOCATION_ERR: 1450 case MAT_USE_INODES: 1451 case MAT_IGNORE_ZERO_ENTRIES: 1452 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1453 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1454 break; 1455 case MAT_ROW_ORIENTED: 1456 a->roworiented = flg; 1457 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1458 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1459 break; 1460 case MAT_NEW_DIAGONALS: 1461 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1462 break; 1463 case MAT_IGNORE_OFF_PROC_ENTRIES: 1464 a->donotstash = PETSC_TRUE; 1465 break; 1466 case MAT_SPD: 1467 A->spd_set = PETSC_TRUE; 1468 A->spd = flg; 1469 if (flg) { 1470 A->symmetric = PETSC_TRUE; 1471 A->structurally_symmetric = PETSC_TRUE; 1472 A->symmetric_set = PETSC_TRUE; 1473 A->structurally_symmetric_set = PETSC_TRUE; 1474 } 1475 break; 1476 case MAT_SYMMETRIC: 1477 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1478 break; 1479 case MAT_STRUCTURALLY_SYMMETRIC: 1480 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1481 break; 1482 case MAT_HERMITIAN: 1483 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1484 break; 1485 case MAT_SYMMETRY_ETERNAL: 1486 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1487 break; 1488 default: 1489 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatGetRow_MPIAIJ" 1496 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1497 { 1498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1499 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1500 PetscErrorCode ierr; 1501 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1502 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1503 PetscInt *cmap,*idx_p; 1504 1505 PetscFunctionBegin; 1506 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1507 mat->getrowactive = PETSC_TRUE; 1508 1509 if (!mat->rowvalues && (idx || v)) { 1510 /* 1511 allocate enough space to hold information from the longest row. 1512 */ 1513 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1514 PetscInt max = 1,tmp; 1515 for (i=0; i<matin->rmap->n; i++) { 1516 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1517 if (max < tmp) { max = tmp; } 1518 } 1519 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1520 } 1521 1522 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1523 lrow = row - rstart; 1524 1525 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1526 if (!v) {pvA = 0; pvB = 0;} 1527 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1528 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1529 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1530 nztot = nzA + nzB; 1531 1532 cmap = mat->garray; 1533 if (v || idx) { 1534 if (nztot) { 1535 /* Sort by increasing column numbers, assuming A and B already sorted */ 1536 PetscInt imark = -1; 1537 if (v) { 1538 *v = v_p = mat->rowvalues; 1539 for (i=0; i<nzB; i++) { 1540 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1541 else break; 1542 } 1543 imark = i; 1544 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1545 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1546 } 1547 if (idx) { 1548 *idx = idx_p = mat->rowindices; 1549 if (imark > -1) { 1550 for (i=0; i<imark; i++) { 1551 idx_p[i] = cmap[cworkB[i]]; 1552 } 1553 } else { 1554 for (i=0; i<nzB; i++) { 1555 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1556 else break; 1557 } 1558 imark = i; 1559 } 1560 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1561 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1562 } 1563 } else { 1564 if (idx) *idx = 0; 1565 if (v) *v = 0; 1566 } 1567 } 1568 *nz = nztot; 1569 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1570 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1576 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1577 { 1578 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1579 1580 PetscFunctionBegin; 1581 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1582 aij->getrowactive = PETSC_FALSE; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatNorm_MPIAIJ" 1588 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1589 { 1590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1591 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1592 PetscErrorCode ierr; 1593 PetscInt i,j,cstart = mat->cmap->rstart; 1594 PetscReal sum = 0.0; 1595 MatScalar *v; 1596 1597 PetscFunctionBegin; 1598 if (aij->size == 1) { 1599 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1600 } else { 1601 if (type == NORM_FROBENIUS) { 1602 v = amat->a; 1603 for (i=0; i<amat->nz; i++) { 1604 #if defined(PETSC_USE_COMPLEX) 1605 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1606 #else 1607 sum += (*v)*(*v); v++; 1608 #endif 1609 } 1610 v = bmat->a; 1611 for (i=0; i<bmat->nz; i++) { 1612 #if defined(PETSC_USE_COMPLEX) 1613 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1614 #else 1615 sum += (*v)*(*v); v++; 1616 #endif 1617 } 1618 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1619 *norm = sqrt(*norm); 1620 } else if (type == NORM_1) { /* max column norm */ 1621 PetscReal *tmp,*tmp2; 1622 PetscInt *jj,*garray = aij->garray; 1623 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1624 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1625 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1626 *norm = 0.0; 1627 v = amat->a; jj = amat->j; 1628 for (j=0; j<amat->nz; j++) { 1629 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1630 } 1631 v = bmat->a; jj = bmat->j; 1632 for (j=0; j<bmat->nz; j++) { 1633 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1634 } 1635 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1636 for (j=0; j<mat->cmap->N; j++) { 1637 if (tmp2[j] > *norm) *norm = tmp2[j]; 1638 } 1639 ierr = PetscFree(tmp);CHKERRQ(ierr); 1640 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1641 } else if (type == NORM_INFINITY) { /* max row norm */ 1642 PetscReal ntemp = 0.0; 1643 for (j=0; j<aij->A->rmap->n; j++) { 1644 v = amat->a + amat->i[j]; 1645 sum = 0.0; 1646 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1647 sum += PetscAbsScalar(*v); v++; 1648 } 1649 v = bmat->a + bmat->i[j]; 1650 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1651 sum += PetscAbsScalar(*v); v++; 1652 } 1653 if (sum > ntemp) ntemp = sum; 1654 } 1655 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1656 } else { 1657 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1658 } 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "MatTranspose_MPIAIJ" 1665 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1668 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1669 PetscErrorCode ierr; 1670 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1671 PetscInt cstart=A->cmap->rstart,ncol; 1672 Mat B; 1673 MatScalar *array; 1674 1675 PetscFunctionBegin; 1676 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1677 1678 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1679 ai = Aloc->i; aj = Aloc->j; 1680 bi = Bloc->i; bj = Bloc->j; 1681 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1682 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 1683 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1684 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 1685 for (i=0; i<ai[ma]; i++){ 1686 d_nnz[aj[i]] ++; 1687 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1688 } 1689 1690 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1691 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1692 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1693 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 1694 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1695 } else { 1696 B = *matout; 1697 } 1698 1699 /* copy over the A part */ 1700 array = Aloc->a; 1701 row = A->rmap->rstart; 1702 for (i=0; i<ma; i++) { 1703 ncol = ai[i+1]-ai[i]; 1704 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1705 row++; array += ncol; aj += ncol; 1706 } 1707 aj = Aloc->j; 1708 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1709 1710 /* copy over the B part */ 1711 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1712 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 1713 array = Bloc->a; 1714 row = A->rmap->rstart; 1715 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 1716 cols_tmp = cols; 1717 for (i=0; i<mb; i++) { 1718 ncol = bi[i+1]-bi[i]; 1719 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1720 row++; array += ncol; cols_tmp += ncol; 1721 } 1722 ierr = PetscFree(cols);CHKERRQ(ierr); 1723 1724 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1725 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1726 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1727 *matout = B; 1728 } else { 1729 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1730 } 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1736 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1739 Mat a = aij->A,b = aij->B; 1740 PetscErrorCode ierr; 1741 PetscInt s1,s2,s3; 1742 1743 PetscFunctionBegin; 1744 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1745 if (rr) { 1746 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1747 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1748 /* Overlap communication with computation. */ 1749 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1750 } 1751 if (ll) { 1752 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1753 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1754 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1755 } 1756 /* scale the diagonal block */ 1757 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1758 1759 if (rr) { 1760 /* Do a scatter end and then right scale the off-diagonal block */ 1761 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1762 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1763 } 1764 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1770 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1771 { 1772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1777 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1778 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 1779 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1784 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1785 { 1786 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatEqual_MPIAIJ" 1796 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1797 { 1798 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1799 Mat a,b,c,d; 1800 PetscBool flg; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 a = matA->A; b = matA->B; 1805 c = matB->A; d = matB->B; 1806 1807 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1808 if (flg) { 1809 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1810 } 1811 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatCopy_MPIAIJ" 1817 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1821 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1822 1823 PetscFunctionBegin; 1824 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1825 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1826 /* because of the column compression in the off-processor part of the matrix a->B, 1827 the number of columns in a->B and b->B may be different, hence we cannot call 1828 the MatCopy() directly on the two parts. If need be, we can provide a more 1829 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1830 then copying the submatrices */ 1831 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1832 } else { 1833 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1834 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1835 } 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1841 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1842 { 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatAXPY_MPIAIJ" 1852 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1853 { 1854 PetscErrorCode ierr; 1855 PetscInt i; 1856 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1857 PetscBLASInt bnz,one=1; 1858 Mat_SeqAIJ *x,*y; 1859 1860 PetscFunctionBegin; 1861 if (str == SAME_NONZERO_PATTERN) { 1862 PetscScalar alpha = a; 1863 x = (Mat_SeqAIJ *)xx->A->data; 1864 y = (Mat_SeqAIJ *)yy->A->data; 1865 bnz = PetscBLASIntCast(x->nz); 1866 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1867 x = (Mat_SeqAIJ *)xx->B->data; 1868 y = (Mat_SeqAIJ *)yy->B->data; 1869 bnz = PetscBLASIntCast(x->nz); 1870 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1871 } else if (str == SUBSET_NONZERO_PATTERN) { 1872 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1873 1874 x = (Mat_SeqAIJ *)xx->B->data; 1875 y = (Mat_SeqAIJ *)yy->B->data; 1876 if (y->xtoy && y->XtoY != xx->B) { 1877 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1878 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1879 } 1880 if (!y->xtoy) { /* get xtoy */ 1881 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1882 y->XtoY = xx->B; 1883 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 1884 } 1885 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1886 } else { 1887 Mat B; 1888 PetscInt *nnz_d,*nnz_o; 1889 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 1890 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 1891 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 1892 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1893 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1894 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 1895 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1896 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 1897 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 1898 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1899 ierr = MatHeaderReplace(Y,B); 1900 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1901 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1902 } 1903 PetscFunctionReturn(0); 1904 } 1905 1906 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatConjugate_MPIAIJ" 1910 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1911 { 1912 #if defined(PETSC_USE_COMPLEX) 1913 PetscErrorCode ierr; 1914 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1915 1916 PetscFunctionBegin; 1917 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1918 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1919 #else 1920 PetscFunctionBegin; 1921 #endif 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatRealPart_MPIAIJ" 1927 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1928 { 1929 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1934 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1940 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1941 { 1942 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1943 PetscErrorCode ierr; 1944 1945 PetscFunctionBegin; 1946 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1947 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #ifdef PETSC_HAVE_PBGL 1952 1953 #include <boost/parallel/mpi/bsp_process_group.hpp> 1954 #include <boost/graph/distributed/ilu_default_graph.hpp> 1955 #include <boost/graph/distributed/ilu_0_block.hpp> 1956 #include <boost/graph/distributed/ilu_preconditioner.hpp> 1957 #include <boost/graph/distributed/petsc/interface.hpp> 1958 #include <boost/multi_array.hpp> 1959 #include <boost/parallel/distributed_property_map->hpp> 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 1963 /* 1964 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 1965 */ 1966 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1967 { 1968 namespace petsc = boost::distributed::petsc; 1969 1970 namespace graph_dist = boost::graph::distributed; 1971 using boost::graph::distributed::ilu_default::process_group_type; 1972 using boost::graph::ilu_permuted; 1973 1974 PetscBool row_identity, col_identity; 1975 PetscContainer c; 1976 PetscInt m, n, M, N; 1977 PetscErrorCode ierr; 1978 1979 PetscFunctionBegin; 1980 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 1981 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 1982 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 1983 if (!row_identity || !col_identity) { 1984 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 1985 } 1986 1987 process_group_type pg; 1988 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 1989 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 1990 lgraph_type& level_graph = *lgraph_p; 1991 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 1992 1993 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 1994 ilu_permuted(level_graph); 1995 1996 /* put together the new matrix */ 1997 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 1998 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 1999 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2000 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2001 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2002 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2003 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 2005 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2006 ierr = PetscContainerSetPointer(c, lgraph_p); 2007 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2013 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2014 { 2015 PetscFunctionBegin; 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatSolve_MPIAIJ" 2021 /* 2022 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2023 */ 2024 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2025 { 2026 namespace graph_dist = boost::graph::distributed; 2027 2028 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2029 lgraph_type* lgraph_p; 2030 PetscContainer c; 2031 PetscErrorCode ierr; 2032 2033 PetscFunctionBegin; 2034 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2035 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2036 ierr = VecCopy(b, x);CHKERRQ(ierr); 2037 2038 PetscScalar* array_x; 2039 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2040 PetscInt sx; 2041 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2042 2043 PetscScalar* array_b; 2044 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2045 PetscInt sb; 2046 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2047 2048 lgraph_type& level_graph = *lgraph_p; 2049 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2050 2051 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2052 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2053 ref_x(array_x, boost::extents[num_vertices(graph)]); 2054 2055 typedef boost::iterator_property_map<array_ref_type::iterator, 2056 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2057 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2058 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2059 2060 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2061 2062 PetscFunctionReturn(0); 2063 } 2064 #endif 2065 2066 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2067 PetscInt nzlocal,nsends,nrecvs; 2068 PetscMPIInt *send_rank,*recv_rank; 2069 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2070 PetscScalar *sbuf_a,**rbuf_a; 2071 PetscErrorCode (*MatDestroy)(Mat); 2072 } Mat_Redundant; 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2076 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2077 { 2078 PetscErrorCode ierr; 2079 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2080 PetscInt i; 2081 2082 PetscFunctionBegin; 2083 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2084 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2085 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2086 for (i=0; i<redund->nrecvs; i++){ 2087 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2088 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2089 } 2090 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2091 ierr = PetscFree(redund);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatDestroy_MatRedundant" 2097 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2098 { 2099 PetscErrorCode ierr; 2100 PetscContainer container; 2101 Mat_Redundant *redund=PETSC_NULL; 2102 2103 PetscFunctionBegin; 2104 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2105 if (container) { 2106 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2107 } else { 2108 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2109 } 2110 A->ops->destroy = redund->MatDestroy; 2111 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2112 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2113 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2119 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2120 { 2121 PetscMPIInt rank,size; 2122 MPI_Comm comm=((PetscObject)mat)->comm; 2123 PetscErrorCode ierr; 2124 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2125 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2126 PetscInt *rowrange=mat->rmap->range; 2127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2128 Mat A=aij->A,B=aij->B,C=*matredundant; 2129 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2130 PetscScalar *sbuf_a; 2131 PetscInt nzlocal=a->nz+b->nz; 2132 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2133 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2134 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2135 MatScalar *aworkA,*aworkB; 2136 PetscScalar *vals; 2137 PetscMPIInt tag1,tag2,tag3,imdex; 2138 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2139 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2140 MPI_Status recv_status,*send_status; 2141 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2142 PetscInt **rbuf_j=PETSC_NULL; 2143 PetscScalar **rbuf_a=PETSC_NULL; 2144 Mat_Redundant *redund=PETSC_NULL; 2145 PetscContainer container; 2146 2147 PetscFunctionBegin; 2148 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2149 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2150 2151 if (reuse == MAT_REUSE_MATRIX) { 2152 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2153 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2154 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2155 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2156 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2157 if (container) { 2158 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2159 } else { 2160 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2161 } 2162 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2163 2164 nsends = redund->nsends; 2165 nrecvs = redund->nrecvs; 2166 send_rank = redund->send_rank; 2167 recv_rank = redund->recv_rank; 2168 sbuf_nz = redund->sbuf_nz; 2169 rbuf_nz = redund->rbuf_nz; 2170 sbuf_j = redund->sbuf_j; 2171 sbuf_a = redund->sbuf_a; 2172 rbuf_j = redund->rbuf_j; 2173 rbuf_a = redund->rbuf_a; 2174 } 2175 2176 if (reuse == MAT_INITIAL_MATRIX){ 2177 PetscMPIInt subrank,subsize; 2178 PetscInt nleftover,np_subcomm; 2179 /* get the destination processors' id send_rank, nsends and nrecvs */ 2180 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2181 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2182 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2183 np_subcomm = size/nsubcomm; 2184 nleftover = size - nsubcomm*np_subcomm; 2185 nsends = 0; nrecvs = 0; 2186 for (i=0; i<size; i++){ /* i=rank*/ 2187 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2188 send_rank[nsends] = i; nsends++; 2189 recv_rank[nrecvs++] = i; 2190 } 2191 } 2192 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2193 i = size-nleftover-1; 2194 j = 0; 2195 while (j < nsubcomm - nleftover){ 2196 send_rank[nsends++] = i; 2197 i--; j++; 2198 } 2199 } 2200 2201 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2202 for (i=0; i<nleftover; i++){ 2203 recv_rank[nrecvs++] = size-nleftover+i; 2204 } 2205 } 2206 2207 /* allocate sbuf_j, sbuf_a */ 2208 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2209 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2210 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2211 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2212 2213 /* copy mat's local entries into the buffers */ 2214 if (reuse == MAT_INITIAL_MATRIX){ 2215 rownz_max = 0; 2216 rptr = sbuf_j; 2217 cols = sbuf_j + rend-rstart + 1; 2218 vals = sbuf_a; 2219 rptr[0] = 0; 2220 for (i=0; i<rend-rstart; i++){ 2221 row = i + rstart; 2222 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2223 ncols = nzA + nzB; 2224 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2225 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2226 /* load the column indices for this row into cols */ 2227 lwrite = 0; 2228 for (l=0; l<nzB; l++) { 2229 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2230 vals[lwrite] = aworkB[l]; 2231 cols[lwrite++] = ctmp; 2232 } 2233 } 2234 for (l=0; l<nzA; l++){ 2235 vals[lwrite] = aworkA[l]; 2236 cols[lwrite++] = cstart + cworkA[l]; 2237 } 2238 for (l=0; l<nzB; l++) { 2239 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2240 vals[lwrite] = aworkB[l]; 2241 cols[lwrite++] = ctmp; 2242 } 2243 } 2244 vals += ncols; 2245 cols += ncols; 2246 rptr[i+1] = rptr[i] + ncols; 2247 if (rownz_max < ncols) rownz_max = ncols; 2248 } 2249 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2250 } else { /* only copy matrix values into sbuf_a */ 2251 rptr = sbuf_j; 2252 vals = sbuf_a; 2253 rptr[0] = 0; 2254 for (i=0; i<rend-rstart; i++){ 2255 row = i + rstart; 2256 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2257 ncols = nzA + nzB; 2258 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2259 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2260 lwrite = 0; 2261 for (l=0; l<nzB; l++) { 2262 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2263 } 2264 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2265 for (l=0; l<nzB; l++) { 2266 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2267 } 2268 vals += ncols; 2269 rptr[i+1] = rptr[i] + ncols; 2270 } 2271 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2272 2273 /* send nzlocal to others, and recv other's nzlocal */ 2274 /*--------------------------------------------------*/ 2275 if (reuse == MAT_INITIAL_MATRIX){ 2276 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2277 s_waits2 = s_waits3 + nsends; 2278 s_waits1 = s_waits2 + nsends; 2279 r_waits1 = s_waits1 + nsends; 2280 r_waits2 = r_waits1 + nrecvs; 2281 r_waits3 = r_waits2 + nrecvs; 2282 } else { 2283 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2284 r_waits3 = s_waits3 + nsends; 2285 } 2286 2287 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2288 if (reuse == MAT_INITIAL_MATRIX){ 2289 /* get new tags to keep the communication clean */ 2290 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2291 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2292 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2293 2294 /* post receives of other's nzlocal */ 2295 for (i=0; i<nrecvs; i++){ 2296 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2297 } 2298 /* send nzlocal to others */ 2299 for (i=0; i<nsends; i++){ 2300 sbuf_nz[i] = nzlocal; 2301 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2302 } 2303 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2304 count = nrecvs; 2305 while (count) { 2306 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2307 recv_rank[imdex] = recv_status.MPI_SOURCE; 2308 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2309 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2310 2311 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2312 rbuf_nz[imdex] += i + 2; 2313 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2314 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2315 count--; 2316 } 2317 /* wait on sends of nzlocal */ 2318 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2319 /* send mat->i,j to others, and recv from other's */ 2320 /*------------------------------------------------*/ 2321 for (i=0; i<nsends; i++){ 2322 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2323 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2324 } 2325 /* wait on receives of mat->i,j */ 2326 /*------------------------------*/ 2327 count = nrecvs; 2328 while (count) { 2329 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2330 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2331 count--; 2332 } 2333 /* wait on sends of mat->i,j */ 2334 /*---------------------------*/ 2335 if (nsends) { 2336 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2337 } 2338 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2339 2340 /* post receives, send and receive mat->a */ 2341 /*----------------------------------------*/ 2342 for (imdex=0; imdex<nrecvs; imdex++) { 2343 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2344 } 2345 for (i=0; i<nsends; i++){ 2346 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2347 } 2348 count = nrecvs; 2349 while (count) { 2350 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2351 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2352 count--; 2353 } 2354 if (nsends) { 2355 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2356 } 2357 2358 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2359 2360 /* create redundant matrix */ 2361 /*-------------------------*/ 2362 if (reuse == MAT_INITIAL_MATRIX){ 2363 /* compute rownz_max for preallocation */ 2364 for (imdex=0; imdex<nrecvs; imdex++){ 2365 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2366 rptr = rbuf_j[imdex]; 2367 for (i=0; i<j; i++){ 2368 ncols = rptr[i+1] - rptr[i]; 2369 if (rownz_max < ncols) rownz_max = ncols; 2370 } 2371 } 2372 2373 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2374 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2375 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2376 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2377 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2378 } else { 2379 C = *matredundant; 2380 } 2381 2382 /* insert local matrix entries */ 2383 rptr = sbuf_j; 2384 cols = sbuf_j + rend-rstart + 1; 2385 vals = sbuf_a; 2386 for (i=0; i<rend-rstart; i++){ 2387 row = i + rstart; 2388 ncols = rptr[i+1] - rptr[i]; 2389 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2390 vals += ncols; 2391 cols += ncols; 2392 } 2393 /* insert received matrix entries */ 2394 for (imdex=0; imdex<nrecvs; imdex++){ 2395 rstart = rowrange[recv_rank[imdex]]; 2396 rend = rowrange[recv_rank[imdex]+1]; 2397 rptr = rbuf_j[imdex]; 2398 cols = rbuf_j[imdex] + rend-rstart + 1; 2399 vals = rbuf_a[imdex]; 2400 for (i=0; i<rend-rstart; i++){ 2401 row = i + rstart; 2402 ncols = rptr[i+1] - rptr[i]; 2403 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2404 vals += ncols; 2405 cols += ncols; 2406 } 2407 } 2408 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2409 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2410 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2411 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2412 if (reuse == MAT_INITIAL_MATRIX){ 2413 PetscContainer container; 2414 *matredundant = C; 2415 /* create a supporting struct and attach it to C for reuse */ 2416 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2417 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2418 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2419 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2420 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2421 2422 redund->nzlocal = nzlocal; 2423 redund->nsends = nsends; 2424 redund->nrecvs = nrecvs; 2425 redund->send_rank = send_rank; 2426 redund->recv_rank = recv_rank; 2427 redund->sbuf_nz = sbuf_nz; 2428 redund->rbuf_nz = rbuf_nz; 2429 redund->sbuf_j = sbuf_j; 2430 redund->sbuf_a = sbuf_a; 2431 redund->rbuf_j = rbuf_j; 2432 redund->rbuf_a = rbuf_a; 2433 2434 redund->MatDestroy = C->ops->destroy; 2435 C->ops->destroy = MatDestroy_MatRedundant; 2436 } 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2442 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2443 { 2444 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2445 PetscErrorCode ierr; 2446 PetscInt i,*idxb = 0; 2447 PetscScalar *va,*vb; 2448 Vec vtmp; 2449 2450 PetscFunctionBegin; 2451 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2452 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2453 if (idx) { 2454 for (i=0; i<A->rmap->n; i++) { 2455 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2456 } 2457 } 2458 2459 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2460 if (idx) { 2461 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2462 } 2463 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2464 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2465 2466 for (i=0; i<A->rmap->n; i++){ 2467 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2468 va[i] = vb[i]; 2469 if (idx) idx[i] = a->garray[idxb[i]]; 2470 } 2471 } 2472 2473 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2474 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2475 if (idxb) { 2476 ierr = PetscFree(idxb);CHKERRQ(ierr); 2477 } 2478 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2484 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2485 { 2486 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2487 PetscErrorCode ierr; 2488 PetscInt i,*idxb = 0; 2489 PetscScalar *va,*vb; 2490 Vec vtmp; 2491 2492 PetscFunctionBegin; 2493 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2494 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2495 if (idx) { 2496 for (i=0; i<A->cmap->n; i++) { 2497 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2498 } 2499 } 2500 2501 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2502 if (idx) { 2503 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2504 } 2505 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2506 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2507 2508 for (i=0; i<A->rmap->n; i++){ 2509 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2510 va[i] = vb[i]; 2511 if (idx) idx[i] = a->garray[idxb[i]]; 2512 } 2513 } 2514 2515 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2516 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2517 if (idxb) { 2518 ierr = PetscFree(idxb);CHKERRQ(ierr); 2519 } 2520 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2526 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2527 { 2528 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2529 PetscInt n = A->rmap->n; 2530 PetscInt cstart = A->cmap->rstart; 2531 PetscInt *cmap = mat->garray; 2532 PetscInt *diagIdx, *offdiagIdx; 2533 Vec diagV, offdiagV; 2534 PetscScalar *a, *diagA, *offdiagA; 2535 PetscInt r; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2540 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2541 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2542 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2543 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2544 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2545 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2546 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2547 for(r = 0; r < n; ++r) { 2548 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2549 a[r] = diagA[r]; 2550 idx[r] = cstart + diagIdx[r]; 2551 } else { 2552 a[r] = offdiagA[r]; 2553 idx[r] = cmap[offdiagIdx[r]]; 2554 } 2555 } 2556 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2557 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2558 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2559 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2560 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2561 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2567 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2568 { 2569 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2570 PetscInt n = A->rmap->n; 2571 PetscInt cstart = A->cmap->rstart; 2572 PetscInt *cmap = mat->garray; 2573 PetscInt *diagIdx, *offdiagIdx; 2574 Vec diagV, offdiagV; 2575 PetscScalar *a, *diagA, *offdiagA; 2576 PetscInt r; 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2581 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2582 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2583 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2584 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2585 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2586 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2587 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2588 for(r = 0; r < n; ++r) { 2589 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2590 a[r] = diagA[r]; 2591 idx[r] = cstart + diagIdx[r]; 2592 } else { 2593 a[r] = offdiagA[r]; 2594 idx[r] = cmap[offdiagIdx[r]]; 2595 } 2596 } 2597 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2598 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2599 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2600 ierr = VecDestroy(diagV);CHKERRQ(ierr); 2601 ierr = VecDestroy(offdiagV);CHKERRQ(ierr); 2602 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 #undef __FUNCT__ 2607 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2608 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2609 { 2610 PetscErrorCode ierr; 2611 Mat *dummy; 2612 2613 PetscFunctionBegin; 2614 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2615 *newmat = *dummy; 2616 ierr = PetscFree(dummy);CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2621 /* -------------------------------------------------------------------*/ 2622 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2623 MatGetRow_MPIAIJ, 2624 MatRestoreRow_MPIAIJ, 2625 MatMult_MPIAIJ, 2626 /* 4*/ MatMultAdd_MPIAIJ, 2627 MatMultTranspose_MPIAIJ, 2628 MatMultTransposeAdd_MPIAIJ, 2629 #ifdef PETSC_HAVE_PBGL 2630 MatSolve_MPIAIJ, 2631 #else 2632 0, 2633 #endif 2634 0, 2635 0, 2636 /*10*/ 0, 2637 0, 2638 0, 2639 MatSOR_MPIAIJ, 2640 MatTranspose_MPIAIJ, 2641 /*15*/ MatGetInfo_MPIAIJ, 2642 MatEqual_MPIAIJ, 2643 MatGetDiagonal_MPIAIJ, 2644 MatDiagonalScale_MPIAIJ, 2645 MatNorm_MPIAIJ, 2646 /*20*/ MatAssemblyBegin_MPIAIJ, 2647 MatAssemblyEnd_MPIAIJ, 2648 MatSetOption_MPIAIJ, 2649 MatZeroEntries_MPIAIJ, 2650 /*24*/ MatZeroRows_MPIAIJ, 2651 0, 2652 #ifdef PETSC_HAVE_PBGL 2653 0, 2654 #else 2655 0, 2656 #endif 2657 0, 2658 0, 2659 /*29*/ MatSetUpPreallocation_MPIAIJ, 2660 #ifdef PETSC_HAVE_PBGL 2661 0, 2662 #else 2663 0, 2664 #endif 2665 0, 2666 0, 2667 0, 2668 /*34*/ MatDuplicate_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*39*/ MatAXPY_MPIAIJ, 2674 MatGetSubMatrices_MPIAIJ, 2675 MatIncreaseOverlap_MPIAIJ, 2676 MatGetValues_MPIAIJ, 2677 MatCopy_MPIAIJ, 2678 /*44*/ MatGetRowMax_MPIAIJ, 2679 MatScale_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 /*49*/ MatSetBlockSize_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*54*/ MatFDColoringCreate_MPIAIJ, 2689 0, 2690 MatSetUnfactored_MPIAIJ, 2691 MatPermute_MPIAIJ, 2692 0, 2693 /*59*/ MatGetSubMatrix_MPIAIJ, 2694 MatDestroy_MPIAIJ, 2695 MatView_MPIAIJ, 2696 0, 2697 0, 2698 /*64*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2704 MatGetRowMinAbs_MPIAIJ, 2705 0, 2706 MatSetColoring_MPIAIJ, 2707 #if defined(PETSC_HAVE_ADIC) 2708 MatSetValuesAdic_MPIAIJ, 2709 #else 2710 0, 2711 #endif 2712 MatSetValuesAdifor_MPIAIJ, 2713 /*75*/ MatFDColoringApply_AIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*80*/ 0, 2719 0, 2720 0, 2721 /*83*/ MatLoad_MPIAIJ, 2722 0, 2723 0, 2724 0, 2725 0, 2726 0, 2727 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2728 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2729 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2730 MatPtAP_Basic, 2731 MatPtAPSymbolic_MPIAIJ, 2732 /*94*/ MatPtAPNumeric_MPIAIJ, 2733 0, 2734 0, 2735 0, 2736 0, 2737 /*99*/ 0, 2738 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2739 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2740 MatConjugate_MPIAIJ, 2741 0, 2742 /*104*/MatSetValuesRow_MPIAIJ, 2743 MatRealPart_MPIAIJ, 2744 MatImaginaryPart_MPIAIJ, 2745 0, 2746 0, 2747 /*109*/0, 2748 MatGetRedundantMatrix_MPIAIJ, 2749 MatGetRowMin_MPIAIJ, 2750 0, 2751 0, 2752 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /*119*/0, 2758 0, 2759 0, 2760 0, 2761 MatGetMultiProcBlock_MPIAIJ 2762 }; 2763 2764 /* ----------------------------------------------------------------------------------------*/ 2765 2766 EXTERN_C_BEGIN 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2769 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 2770 { 2771 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2772 PetscErrorCode ierr; 2773 2774 PetscFunctionBegin; 2775 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2776 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 EXTERN_C_END 2780 2781 EXTERN_C_BEGIN 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2784 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 2785 { 2786 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2787 PetscErrorCode ierr; 2788 2789 PetscFunctionBegin; 2790 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2791 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2792 PetscFunctionReturn(0); 2793 } 2794 EXTERN_C_END 2795 2796 EXTERN_C_BEGIN 2797 #undef __FUNCT__ 2798 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2799 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2800 { 2801 Mat_MPIAIJ *b; 2802 PetscErrorCode ierr; 2803 PetscInt i; 2804 2805 PetscFunctionBegin; 2806 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2807 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2808 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2809 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2810 2811 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 2812 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 2813 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2814 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2815 if (d_nnz) { 2816 for (i=0; i<B->rmap->n; i++) { 2817 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 2818 } 2819 } 2820 if (o_nnz) { 2821 for (i=0; i<B->rmap->n; i++) { 2822 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 2823 } 2824 } 2825 b = (Mat_MPIAIJ*)B->data; 2826 2827 if (!B->preallocated) { 2828 /* Explicitly create 2 MATSEQAIJ matrices. */ 2829 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2830 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2831 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2832 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2833 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2834 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2835 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2837 } 2838 2839 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2840 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2841 B->preallocated = PETSC_TRUE; 2842 PetscFunctionReturn(0); 2843 } 2844 EXTERN_C_END 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2848 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2849 { 2850 Mat mat; 2851 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2852 PetscErrorCode ierr; 2853 2854 PetscFunctionBegin; 2855 *newmat = 0; 2856 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2857 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2858 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2859 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2860 a = (Mat_MPIAIJ*)mat->data; 2861 2862 mat->factortype = matin->factortype; 2863 mat->rmap->bs = matin->rmap->bs; 2864 mat->assembled = PETSC_TRUE; 2865 mat->insertmode = NOT_SET_VALUES; 2866 mat->preallocated = PETSC_TRUE; 2867 2868 a->size = oldmat->size; 2869 a->rank = oldmat->rank; 2870 a->donotstash = oldmat->donotstash; 2871 a->roworiented = oldmat->roworiented; 2872 a->rowindices = 0; 2873 a->rowvalues = 0; 2874 a->getrowactive = PETSC_FALSE; 2875 2876 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2877 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2878 2879 if (oldmat->colmap) { 2880 #if defined (PETSC_USE_CTABLE) 2881 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2882 #else 2883 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2884 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2885 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2886 #endif 2887 } else a->colmap = 0; 2888 if (oldmat->garray) { 2889 PetscInt len; 2890 len = oldmat->B->cmap->n; 2891 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2892 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2893 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2894 } else a->garray = 0; 2895 2896 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2897 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2898 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2899 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2900 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2902 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2903 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2904 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2905 *newmat = mat; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 /* 2910 Allows sending/receiving larger messages then 2 gigabytes in a single call 2911 */ 2912 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 2913 { 2914 int ierr; 2915 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2916 PetscInt i,numchunks; 2917 PetscMPIInt icnt; 2918 2919 numchunks = cnt/CHUNKSIZE + 1; 2920 for (i=0; i<numchunks; i++) { 2921 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2922 ierr = MPI_Send(mess,icnt,type,to,tag,comm); 2923 if (type == MPIU_INT) { 2924 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2925 } else if (type == MPIU_SCALAR) { 2926 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2927 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2928 } 2929 return 0; 2930 } 2931 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 2932 { 2933 int ierr; 2934 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 2935 MPI_Status status; 2936 PetscInt i,numchunks; 2937 PetscMPIInt icnt; 2938 2939 numchunks = cnt/CHUNKSIZE + 1; 2940 for (i=0; i<numchunks; i++) { 2941 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 2942 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status); 2943 if (type == MPIU_INT) { 2944 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 2945 } else if (type == MPIU_SCALAR) { 2946 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 2947 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 2948 } 2949 return 0; 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatLoad_MPIAIJ" 2954 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2955 { 2956 PetscScalar *vals,*svals; 2957 MPI_Comm comm = ((PetscObject)viewer)->comm; 2958 PetscErrorCode ierr; 2959 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2960 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2961 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2962 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 2963 PetscInt cend,cstart,n,*rowners,sizesset=1; 2964 int fd; 2965 2966 PetscFunctionBegin; 2967 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2968 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2969 if (!rank) { 2970 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2971 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2972 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2973 } 2974 2975 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2976 2977 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2978 M = header[1]; N = header[2]; 2979 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2980 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2981 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2982 2983 /* If global sizes are set, check if they are consistent with that given in the file */ 2984 if (sizesset) { 2985 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2986 } 2987 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2988 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2989 2990 /* determine ownership of all rows */ 2991 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 2992 else m = newMat->rmap->n; /* Set by user */ 2993 2994 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2995 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2996 2997 /* First process needs enough room for process with most rows */ 2998 if (!rank) { 2999 mmax = rowners[1]; 3000 for (i=2; i<size; i++) { 3001 mmax = PetscMax(mmax,rowners[i]); 3002 } 3003 } else mmax = m; 3004 3005 rowners[0] = 0; 3006 for (i=2; i<=size; i++) { 3007 rowners[i] += rowners[i-1]; 3008 } 3009 rstart = rowners[rank]; 3010 rend = rowners[rank+1]; 3011 3012 /* distribute row lengths to all processors */ 3013 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3014 if (!rank) { 3015 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3016 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3017 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3018 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3019 for (j=0; j<m; j++) { 3020 procsnz[0] += ourlens[j]; 3021 } 3022 for (i=1; i<size; i++) { 3023 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3024 /* calculate the number of nonzeros on each processor */ 3025 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3026 procsnz[i] += rowlengths[j]; 3027 } 3028 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3029 } 3030 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3031 } else { 3032 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3033 } 3034 3035 if (!rank) { 3036 /* determine max buffer needed and allocate it */ 3037 maxnz = 0; 3038 for (i=0; i<size; i++) { 3039 maxnz = PetscMax(maxnz,procsnz[i]); 3040 } 3041 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3042 3043 /* read in my part of the matrix column indices */ 3044 nz = procsnz[0]; 3045 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3046 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3047 3048 /* read in every one elses and ship off */ 3049 for (i=1; i<size; i++) { 3050 nz = procsnz[i]; 3051 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3052 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3053 } 3054 ierr = PetscFree(cols);CHKERRQ(ierr); 3055 } else { 3056 /* determine buffer space needed for message */ 3057 nz = 0; 3058 for (i=0; i<m; i++) { 3059 nz += ourlens[i]; 3060 } 3061 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3062 3063 /* receive message of column indices*/ 3064 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3065 } 3066 3067 /* determine column ownership if matrix is not square */ 3068 if (N != M) { 3069 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3070 else n = newMat->cmap->n; 3071 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3072 cstart = cend - n; 3073 } else { 3074 cstart = rstart; 3075 cend = rend; 3076 n = cend - cstart; 3077 } 3078 3079 /* loop over local rows, determining number of off diagonal entries */ 3080 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3081 jj = 0; 3082 for (i=0; i<m; i++) { 3083 for (j=0; j<ourlens[i]; j++) { 3084 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3085 jj++; 3086 } 3087 } 3088 3089 for (i=0; i<m; i++) { 3090 ourlens[i] -= offlens[i]; 3091 } 3092 if (!sizesset) { 3093 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3094 } 3095 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3096 3097 for (i=0; i<m; i++) { 3098 ourlens[i] += offlens[i]; 3099 } 3100 3101 if (!rank) { 3102 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3103 3104 /* read in my part of the matrix numerical values */ 3105 nz = procsnz[0]; 3106 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3107 3108 /* insert into matrix */ 3109 jj = rstart; 3110 smycols = mycols; 3111 svals = vals; 3112 for (i=0; i<m; i++) { 3113 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3114 smycols += ourlens[i]; 3115 svals += ourlens[i]; 3116 jj++; 3117 } 3118 3119 /* read in other processors and ship out */ 3120 for (i=1; i<size; i++) { 3121 nz = procsnz[i]; 3122 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3123 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3124 } 3125 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3126 } else { 3127 /* receive numeric values */ 3128 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3129 3130 /* receive message of values*/ 3131 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3132 3133 /* insert into matrix */ 3134 jj = rstart; 3135 smycols = mycols; 3136 svals = vals; 3137 for (i=0; i<m; i++) { 3138 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3139 smycols += ourlens[i]; 3140 svals += ourlens[i]; 3141 jj++; 3142 } 3143 } 3144 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3145 ierr = PetscFree(vals);CHKERRQ(ierr); 3146 ierr = PetscFree(mycols);CHKERRQ(ierr); 3147 ierr = PetscFree(rowners);CHKERRQ(ierr); 3148 3149 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 PetscFunctionReturn(0); 3152 } 3153 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3156 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3157 { 3158 PetscErrorCode ierr; 3159 IS iscol_local; 3160 PetscInt csize; 3161 3162 PetscFunctionBegin; 3163 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3164 if (call == MAT_REUSE_MATRIX) { 3165 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3166 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3167 } else { 3168 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3169 } 3170 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3171 if (call == MAT_INITIAL_MATRIX) { 3172 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3173 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 3174 } 3175 PetscFunctionReturn(0); 3176 } 3177 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3180 /* 3181 Not great since it makes two copies of the submatrix, first an SeqAIJ 3182 in local and then by concatenating the local matrices the end result. 3183 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3184 3185 Note: This requires a sequential iscol with all indices. 3186 */ 3187 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3188 { 3189 PetscErrorCode ierr; 3190 PetscMPIInt rank,size; 3191 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3192 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3193 Mat *local,M,Mreuse; 3194 MatScalar *vwork,*aa; 3195 MPI_Comm comm = ((PetscObject)mat)->comm; 3196 Mat_SeqAIJ *aij; 3197 3198 3199 PetscFunctionBegin; 3200 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3201 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3202 3203 if (call == MAT_REUSE_MATRIX) { 3204 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3205 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3206 local = &Mreuse; 3207 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3208 } else { 3209 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3210 Mreuse = *local; 3211 ierr = PetscFree(local);CHKERRQ(ierr); 3212 } 3213 3214 /* 3215 m - number of local rows 3216 n - number of columns (same on all processors) 3217 rstart - first row in new global matrix generated 3218 */ 3219 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3220 if (call == MAT_INITIAL_MATRIX) { 3221 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3222 ii = aij->i; 3223 jj = aij->j; 3224 3225 /* 3226 Determine the number of non-zeros in the diagonal and off-diagonal 3227 portions of the matrix in order to do correct preallocation 3228 */ 3229 3230 /* first get start and end of "diagonal" columns */ 3231 if (csize == PETSC_DECIDE) { 3232 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3233 if (mglobal == n) { /* square matrix */ 3234 nlocal = m; 3235 } else { 3236 nlocal = n/size + ((n % size) > rank); 3237 } 3238 } else { 3239 nlocal = csize; 3240 } 3241 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3242 rstart = rend - nlocal; 3243 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3244 3245 /* next, compute all the lengths */ 3246 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3247 olens = dlens + m; 3248 for (i=0; i<m; i++) { 3249 jend = ii[i+1] - ii[i]; 3250 olen = 0; 3251 dlen = 0; 3252 for (j=0; j<jend; j++) { 3253 if (*jj < rstart || *jj >= rend) olen++; 3254 else dlen++; 3255 jj++; 3256 } 3257 olens[i] = olen; 3258 dlens[i] = dlen; 3259 } 3260 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3261 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3262 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3263 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3264 ierr = PetscFree(dlens);CHKERRQ(ierr); 3265 } else { 3266 PetscInt ml,nl; 3267 3268 M = *newmat; 3269 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3270 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3271 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3272 /* 3273 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3274 rather than the slower MatSetValues(). 3275 */ 3276 M->was_assembled = PETSC_TRUE; 3277 M->assembled = PETSC_FALSE; 3278 } 3279 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3280 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3281 ii = aij->i; 3282 jj = aij->j; 3283 aa = aij->a; 3284 for (i=0; i<m; i++) { 3285 row = rstart + i; 3286 nz = ii[i+1] - ii[i]; 3287 cwork = jj; jj += nz; 3288 vwork = aa; aa += nz; 3289 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3290 } 3291 3292 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3294 *newmat = M; 3295 3296 /* save submatrix used in processor for next request */ 3297 if (call == MAT_INITIAL_MATRIX) { 3298 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3299 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 3300 } 3301 3302 PetscFunctionReturn(0); 3303 } 3304 3305 EXTERN_C_BEGIN 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3308 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3309 { 3310 PetscInt m,cstart, cend,j,nnz,i,d; 3311 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3312 const PetscInt *JJ; 3313 PetscScalar *values; 3314 PetscErrorCode ierr; 3315 3316 PetscFunctionBegin; 3317 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3318 3319 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3320 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3321 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3322 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3323 m = B->rmap->n; 3324 cstart = B->cmap->rstart; 3325 cend = B->cmap->rend; 3326 rstart = B->rmap->rstart; 3327 3328 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3329 3330 #if defined(PETSC_USE_DEBUGGING) 3331 for (i=0; i<m; i++) { 3332 nnz = Ii[i+1]- Ii[i]; 3333 JJ = J + Ii[i]; 3334 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3335 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3336 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3337 } 3338 #endif 3339 3340 for (i=0; i<m; i++) { 3341 nnz = Ii[i+1]- Ii[i]; 3342 JJ = J + Ii[i]; 3343 nnz_max = PetscMax(nnz_max,nnz); 3344 d = 0; 3345 for (j=0; j<nnz; j++) { 3346 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3347 } 3348 d_nnz[i] = d; 3349 o_nnz[i] = nnz - d; 3350 } 3351 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3352 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3353 3354 if (v) values = (PetscScalar*)v; 3355 else { 3356 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3357 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3358 } 3359 3360 for (i=0; i<m; i++) { 3361 ii = i + rstart; 3362 nnz = Ii[i+1]- Ii[i]; 3363 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3364 } 3365 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3366 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3367 3368 if (!v) { 3369 ierr = PetscFree(values);CHKERRQ(ierr); 3370 } 3371 PetscFunctionReturn(0); 3372 } 3373 EXTERN_C_END 3374 3375 #undef __FUNCT__ 3376 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3377 /*@ 3378 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3379 (the default parallel PETSc format). 3380 3381 Collective on MPI_Comm 3382 3383 Input Parameters: 3384 + B - the matrix 3385 . i - the indices into j for the start of each local row (starts with zero) 3386 . j - the column indices for each local row (starts with zero) 3387 - v - optional values in the matrix 3388 3389 Level: developer 3390 3391 Notes: 3392 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3393 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3394 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3395 3396 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3397 3398 The format which is used for the sparse matrix input, is equivalent to a 3399 row-major ordering.. i.e for the following matrix, the input data expected is 3400 as shown: 3401 3402 1 0 0 3403 2 0 3 P0 3404 ------- 3405 4 5 6 P1 3406 3407 Process0 [P0]: rows_owned=[0,1] 3408 i = {0,1,3} [size = nrow+1 = 2+1] 3409 j = {0,0,2} [size = nz = 6] 3410 v = {1,2,3} [size = nz = 6] 3411 3412 Process1 [P1]: rows_owned=[2] 3413 i = {0,3} [size = nrow+1 = 1+1] 3414 j = {0,1,2} [size = nz = 6] 3415 v = {4,5,6} [size = nz = 6] 3416 3417 .keywords: matrix, aij, compressed row, sparse, parallel 3418 3419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3420 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3421 @*/ 3422 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3423 { 3424 PetscErrorCode ierr; 3425 3426 PetscFunctionBegin; 3427 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3428 PetscFunctionReturn(0); 3429 } 3430 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3433 /*@C 3434 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3435 (the default parallel PETSc format). For good matrix assembly performance 3436 the user should preallocate the matrix storage by setting the parameters 3437 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3438 performance can be increased by more than a factor of 50. 3439 3440 Collective on MPI_Comm 3441 3442 Input Parameters: 3443 + A - the matrix 3444 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3445 (same value is used for all local rows) 3446 . d_nnz - array containing the number of nonzeros in the various rows of the 3447 DIAGONAL portion of the local submatrix (possibly different for each row) 3448 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3449 The size of this array is equal to the number of local rows, i.e 'm'. 3450 You must leave room for the diagonal entry even if it is zero. 3451 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3452 submatrix (same value is used for all local rows). 3453 - o_nnz - array containing the number of nonzeros in the various rows of the 3454 OFF-DIAGONAL portion of the local submatrix (possibly different for 3455 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3456 structure. The size of this array is equal to the number 3457 of local rows, i.e 'm'. 3458 3459 If the *_nnz parameter is given then the *_nz parameter is ignored 3460 3461 The AIJ format (also called the Yale sparse matrix format or 3462 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3463 storage. The stored row and column indices begin with zero. 3464 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3465 3466 The parallel matrix is partitioned such that the first m0 rows belong to 3467 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3468 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3469 3470 The DIAGONAL portion of the local submatrix of a processor can be defined 3471 as the submatrix which is obtained by extraction the part corresponding to 3472 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3473 first row that belongs to the processor, r2 is the last row belonging to 3474 the this processor, and c1-c2 is range of indices of the local part of a 3475 vector suitable for applying the matrix to. This is an mxn matrix. In the 3476 common case of a square matrix, the row and column ranges are the same and 3477 the DIAGONAL part is also square. The remaining portion of the local 3478 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3479 3480 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3481 3482 You can call MatGetInfo() to get information on how effective the preallocation was; 3483 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3484 You can also run with the option -info and look for messages with the string 3485 malloc in them to see if additional memory allocation was needed. 3486 3487 Example usage: 3488 3489 Consider the following 8x8 matrix with 34 non-zero values, that is 3490 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3491 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3492 as follows: 3493 3494 .vb 3495 1 2 0 | 0 3 0 | 0 4 3496 Proc0 0 5 6 | 7 0 0 | 8 0 3497 9 0 10 | 11 0 0 | 12 0 3498 ------------------------------------- 3499 13 0 14 | 15 16 17 | 0 0 3500 Proc1 0 18 0 | 19 20 21 | 0 0 3501 0 0 0 | 22 23 0 | 24 0 3502 ------------------------------------- 3503 Proc2 25 26 27 | 0 0 28 | 29 0 3504 30 0 0 | 31 32 33 | 0 34 3505 .ve 3506 3507 This can be represented as a collection of submatrices as: 3508 3509 .vb 3510 A B C 3511 D E F 3512 G H I 3513 .ve 3514 3515 Where the submatrices A,B,C are owned by proc0, D,E,F are 3516 owned by proc1, G,H,I are owned by proc2. 3517 3518 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3519 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3520 The 'M','N' parameters are 8,8, and have the same values on all procs. 3521 3522 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3523 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3524 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3525 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3526 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3527 matrix, ans [DF] as another SeqAIJ matrix. 3528 3529 When d_nz, o_nz parameters are specified, d_nz storage elements are 3530 allocated for every row of the local diagonal submatrix, and o_nz 3531 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3532 One way to choose d_nz and o_nz is to use the max nonzerors per local 3533 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3534 In this case, the values of d_nz,o_nz are: 3535 .vb 3536 proc0 : dnz = 2, o_nz = 2 3537 proc1 : dnz = 3, o_nz = 2 3538 proc2 : dnz = 1, o_nz = 4 3539 .ve 3540 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3541 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3542 for proc3. i.e we are using 12+15+10=37 storage locations to store 3543 34 values. 3544 3545 When d_nnz, o_nnz parameters are specified, the storage is specified 3546 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3547 In the above case the values for d_nnz,o_nnz are: 3548 .vb 3549 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3550 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3551 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3552 .ve 3553 Here the space allocated is sum of all the above values i.e 34, and 3554 hence pre-allocation is perfect. 3555 3556 Level: intermediate 3557 3558 .keywords: matrix, aij, compressed row, sparse, parallel 3559 3560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3561 MPIAIJ, MatGetInfo() 3562 @*/ 3563 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3564 { 3565 PetscErrorCode ierr; 3566 3567 PetscFunctionBegin; 3568 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3569 PetscFunctionReturn(0); 3570 } 3571 3572 #undef __FUNCT__ 3573 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3574 /*@ 3575 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3576 CSR format the local rows. 3577 3578 Collective on MPI_Comm 3579 3580 Input Parameters: 3581 + comm - MPI communicator 3582 . m - number of local rows (Cannot be PETSC_DECIDE) 3583 . n - This value should be the same as the local size used in creating the 3584 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3585 calculated if N is given) For square matrices n is almost always m. 3586 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3587 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3588 . i - row indices 3589 . j - column indices 3590 - a - matrix values 3591 3592 Output Parameter: 3593 . mat - the matrix 3594 3595 Level: intermediate 3596 3597 Notes: 3598 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3599 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3600 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3601 3602 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3603 3604 The format which is used for the sparse matrix input, is equivalent to a 3605 row-major ordering.. i.e for the following matrix, the input data expected is 3606 as shown: 3607 3608 1 0 0 3609 2 0 3 P0 3610 ------- 3611 4 5 6 P1 3612 3613 Process0 [P0]: rows_owned=[0,1] 3614 i = {0,1,3} [size = nrow+1 = 2+1] 3615 j = {0,0,2} [size = nz = 6] 3616 v = {1,2,3} [size = nz = 6] 3617 3618 Process1 [P1]: rows_owned=[2] 3619 i = {0,3} [size = nrow+1 = 1+1] 3620 j = {0,1,2} [size = nz = 6] 3621 v = {4,5,6} [size = nz = 6] 3622 3623 .keywords: matrix, aij, compressed row, sparse, parallel 3624 3625 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3626 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3627 @*/ 3628 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3629 { 3630 PetscErrorCode ierr; 3631 3632 PetscFunctionBegin; 3633 if (i[0]) { 3634 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3635 } 3636 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3637 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3638 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3639 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3640 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3641 PetscFunctionReturn(0); 3642 } 3643 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "MatCreateMPIAIJ" 3646 /*@C 3647 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3648 (the default parallel PETSc format). For good matrix assembly performance 3649 the user should preallocate the matrix storage by setting the parameters 3650 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3651 performance can be increased by more than a factor of 50. 3652 3653 Collective on MPI_Comm 3654 3655 Input Parameters: 3656 + comm - MPI communicator 3657 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3658 This value should be the same as the local size used in creating the 3659 y vector for the matrix-vector product y = Ax. 3660 . n - This value should be the same as the local size used in creating the 3661 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3662 calculated if N is given) For square matrices n is almost always m. 3663 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3664 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3665 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3666 (same value is used for all local rows) 3667 . d_nnz - array containing the number of nonzeros in the various rows of the 3668 DIAGONAL portion of the local submatrix (possibly different for each row) 3669 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3670 The size of this array is equal to the number of local rows, i.e 'm'. 3671 You must leave room for the diagonal entry even if it is zero. 3672 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3673 submatrix (same value is used for all local rows). 3674 - o_nnz - array containing the number of nonzeros in the various rows of the 3675 OFF-DIAGONAL portion of the local submatrix (possibly different for 3676 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3677 structure. The size of this array is equal to the number 3678 of local rows, i.e 'm'. 3679 3680 Output Parameter: 3681 . A - the matrix 3682 3683 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3684 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3685 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3686 3687 Notes: 3688 If the *_nnz parameter is given then the *_nz parameter is ignored 3689 3690 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3691 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3692 storage requirements for this matrix. 3693 3694 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3695 processor than it must be used on all processors that share the object for 3696 that argument. 3697 3698 The user MUST specify either the local or global matrix dimensions 3699 (possibly both). 3700 3701 The parallel matrix is partitioned across processors such that the 3702 first m0 rows belong to process 0, the next m1 rows belong to 3703 process 1, the next m2 rows belong to process 2 etc.. where 3704 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3705 values corresponding to [m x N] submatrix. 3706 3707 The columns are logically partitioned with the n0 columns belonging 3708 to 0th partition, the next n1 columns belonging to the next 3709 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 3710 3711 The DIAGONAL portion of the local submatrix on any given processor 3712 is the submatrix corresponding to the rows and columns m,n 3713 corresponding to the given processor. i.e diagonal matrix on 3714 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3715 etc. The remaining portion of the local submatrix [m x (N-n)] 3716 constitute the OFF-DIAGONAL portion. The example below better 3717 illustrates this concept. 3718 3719 For a square global matrix we define each processor's diagonal portion 3720 to be its local rows and the corresponding columns (a square submatrix); 3721 each processor's off-diagonal portion encompasses the remainder of the 3722 local matrix (a rectangular submatrix). 3723 3724 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3725 3726 When calling this routine with a single process communicator, a matrix of 3727 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3728 type of communicator, use the construction mechanism: 3729 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3730 3731 By default, this format uses inodes (identical nodes) when possible. 3732 We search for consecutive rows with the same nonzero structure, thereby 3733 reusing matrix information to achieve increased efficiency. 3734 3735 Options Database Keys: 3736 + -mat_no_inode - Do not use inodes 3737 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3738 - -mat_aij_oneindex - Internally use indexing starting at 1 3739 rather than 0. Note that when calling MatSetValues(), 3740 the user still MUST index entries starting at 0! 3741 3742 3743 Example usage: 3744 3745 Consider the following 8x8 matrix with 34 non-zero values, that is 3746 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3747 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3748 as follows: 3749 3750 .vb 3751 1 2 0 | 0 3 0 | 0 4 3752 Proc0 0 5 6 | 7 0 0 | 8 0 3753 9 0 10 | 11 0 0 | 12 0 3754 ------------------------------------- 3755 13 0 14 | 15 16 17 | 0 0 3756 Proc1 0 18 0 | 19 20 21 | 0 0 3757 0 0 0 | 22 23 0 | 24 0 3758 ------------------------------------- 3759 Proc2 25 26 27 | 0 0 28 | 29 0 3760 30 0 0 | 31 32 33 | 0 34 3761 .ve 3762 3763 This can be represented as a collection of submatrices as: 3764 3765 .vb 3766 A B C 3767 D E F 3768 G H I 3769 .ve 3770 3771 Where the submatrices A,B,C are owned by proc0, D,E,F are 3772 owned by proc1, G,H,I are owned by proc2. 3773 3774 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3775 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3776 The 'M','N' parameters are 8,8, and have the same values on all procs. 3777 3778 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3779 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3780 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3781 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3782 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3783 matrix, ans [DF] as another SeqAIJ matrix. 3784 3785 When d_nz, o_nz parameters are specified, d_nz storage elements are 3786 allocated for every row of the local diagonal submatrix, and o_nz 3787 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3788 One way to choose d_nz and o_nz is to use the max nonzerors per local 3789 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3790 In this case, the values of d_nz,o_nz are: 3791 .vb 3792 proc0 : dnz = 2, o_nz = 2 3793 proc1 : dnz = 3, o_nz = 2 3794 proc2 : dnz = 1, o_nz = 4 3795 .ve 3796 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3797 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3798 for proc3. i.e we are using 12+15+10=37 storage locations to store 3799 34 values. 3800 3801 When d_nnz, o_nnz parameters are specified, the storage is specified 3802 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3803 In the above case the values for d_nnz,o_nnz are: 3804 .vb 3805 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3806 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3807 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3808 .ve 3809 Here the space allocated is sum of all the above values i.e 34, and 3810 hence pre-allocation is perfect. 3811 3812 Level: intermediate 3813 3814 .keywords: matrix, aij, compressed row, sparse, parallel 3815 3816 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3817 MPIAIJ, MatCreateMPIAIJWithArrays() 3818 @*/ 3819 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3820 { 3821 PetscErrorCode ierr; 3822 PetscMPIInt size; 3823 3824 PetscFunctionBegin; 3825 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3826 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3827 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3828 if (size > 1) { 3829 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3830 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3831 } else { 3832 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3833 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3834 } 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3840 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3841 { 3842 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 3843 3844 PetscFunctionBegin; 3845 *Ad = a->A; 3846 *Ao = a->B; 3847 *colmap = a->garray; 3848 PetscFunctionReturn(0); 3849 } 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3853 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3854 { 3855 PetscErrorCode ierr; 3856 PetscInt i; 3857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3858 3859 PetscFunctionBegin; 3860 if (coloring->ctype == IS_COLORING_GLOBAL) { 3861 ISColoringValue *allcolors,*colors; 3862 ISColoring ocoloring; 3863 3864 /* set coloring for diagonal portion */ 3865 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3866 3867 /* set coloring for off-diagonal portion */ 3868 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 3869 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3870 for (i=0; i<a->B->cmap->n; i++) { 3871 colors[i] = allcolors[a->garray[i]]; 3872 } 3873 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3874 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3875 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3876 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3877 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3878 ISColoringValue *colors; 3879 PetscInt *larray; 3880 ISColoring ocoloring; 3881 3882 /* set coloring for diagonal portion */ 3883 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3884 for (i=0; i<a->A->cmap->n; i++) { 3885 larray[i] = i + A->cmap->rstart; 3886 } 3887 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 3888 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3889 for (i=0; i<a->A->cmap->n; i++) { 3890 colors[i] = coloring->colors[larray[i]]; 3891 } 3892 ierr = PetscFree(larray);CHKERRQ(ierr); 3893 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3894 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3895 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3896 3897 /* set coloring for off-diagonal portion */ 3898 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 3899 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 3900 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 3901 for (i=0; i<a->B->cmap->n; i++) { 3902 colors[i] = coloring->colors[larray[i]]; 3903 } 3904 ierr = PetscFree(larray);CHKERRQ(ierr); 3905 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3906 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3907 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 3908 } else { 3909 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3910 } 3911 3912 PetscFunctionReturn(0); 3913 } 3914 3915 #if defined(PETSC_HAVE_ADIC) 3916 #undef __FUNCT__ 3917 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 3918 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 3919 { 3920 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3921 PetscErrorCode ierr; 3922 3923 PetscFunctionBegin; 3924 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 3925 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 3926 PetscFunctionReturn(0); 3927 } 3928 #endif 3929 3930 #undef __FUNCT__ 3931 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3932 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3933 { 3934 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3935 PetscErrorCode ierr; 3936 3937 PetscFunctionBegin; 3938 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3939 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3940 PetscFunctionReturn(0); 3941 } 3942 3943 #undef __FUNCT__ 3944 #define __FUNCT__ "MatMerge" 3945 /*@ 3946 MatMerge - Creates a single large PETSc matrix by concatinating sequential 3947 matrices from each processor 3948 3949 Collective on MPI_Comm 3950 3951 Input Parameters: 3952 + comm - the communicators the parallel matrix will live on 3953 . inmat - the input sequential matrices 3954 . n - number of local columns (or PETSC_DECIDE) 3955 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3956 3957 Output Parameter: 3958 . outmat - the parallel matrix generated 3959 3960 Level: advanced 3961 3962 Notes: The number of columns of the matrix in EACH processor MUST be the same. 3963 3964 @*/ 3965 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3966 { 3967 PetscErrorCode ierr; 3968 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 3969 PetscInt *indx; 3970 PetscScalar *values; 3971 3972 PetscFunctionBegin; 3973 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3974 if (scall == MAT_INITIAL_MATRIX){ 3975 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 3976 if (n == PETSC_DECIDE){ 3977 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3978 } 3979 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3980 rstart -= m; 3981 3982 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3983 for (i=0;i<m;i++) { 3984 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3985 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3986 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 3987 } 3988 /* This routine will ONLY return MPIAIJ type matrix */ 3989 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3990 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3991 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3992 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3993 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3994 3995 } else if (scall == MAT_REUSE_MATRIX){ 3996 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 3997 } else { 3998 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3999 } 4000 4001 for (i=0;i<m;i++) { 4002 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4003 Ii = i + rstart; 4004 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4005 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4006 } 4007 ierr = MatDestroy(inmat);CHKERRQ(ierr); 4008 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4009 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4010 4011 PetscFunctionReturn(0); 4012 } 4013 4014 #undef __FUNCT__ 4015 #define __FUNCT__ "MatFileSplit" 4016 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4017 { 4018 PetscErrorCode ierr; 4019 PetscMPIInt rank; 4020 PetscInt m,N,i,rstart,nnz; 4021 size_t len; 4022 const PetscInt *indx; 4023 PetscViewer out; 4024 char *name; 4025 Mat B; 4026 const PetscScalar *values; 4027 4028 PetscFunctionBegin; 4029 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4030 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4031 /* Should this be the type of the diagonal block of A? */ 4032 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4033 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4034 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4035 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4036 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4037 for (i=0;i<m;i++) { 4038 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4039 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4040 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4041 } 4042 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4043 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4044 4045 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4046 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4047 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4048 sprintf(name,"%s.%d",outfile,rank); 4049 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4050 ierr = PetscFree(name); 4051 ierr = MatView(B,out);CHKERRQ(ierr); 4052 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 4053 ierr = MatDestroy(B);CHKERRQ(ierr); 4054 PetscFunctionReturn(0); 4055 } 4056 4057 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 4058 #undef __FUNCT__ 4059 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4060 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4061 { 4062 PetscErrorCode ierr; 4063 Mat_Merge_SeqsToMPI *merge; 4064 PetscContainer container; 4065 4066 PetscFunctionBegin; 4067 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4068 if (container) { 4069 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4070 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4071 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4072 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4073 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4074 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4075 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4076 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4077 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4078 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4079 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4080 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4081 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4082 ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr); 4083 4084 ierr = PetscContainerDestroy(container);CHKERRQ(ierr); 4085 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4086 } 4087 ierr = PetscFree(merge);CHKERRQ(ierr); 4088 4089 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4090 PetscFunctionReturn(0); 4091 } 4092 4093 #include "../src/mat/utils/freespace.h" 4094 #include "petscbt.h" 4095 4096 #undef __FUNCT__ 4097 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4098 /*@C 4099 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4100 matrices from each processor 4101 4102 Collective on MPI_Comm 4103 4104 Input Parameters: 4105 + comm - the communicators the parallel matrix will live on 4106 . seqmat - the input sequential matrices 4107 . m - number of local rows (or PETSC_DECIDE) 4108 . n - number of local columns (or PETSC_DECIDE) 4109 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4110 4111 Output Parameter: 4112 . mpimat - the parallel matrix generated 4113 4114 Level: advanced 4115 4116 Notes: 4117 The dimensions of the sequential matrix in each processor MUST be the same. 4118 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4119 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4120 @*/ 4121 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4122 { 4123 PetscErrorCode ierr; 4124 MPI_Comm comm=((PetscObject)mpimat)->comm; 4125 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4126 PetscMPIInt size,rank,taga,*len_s; 4127 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4128 PetscInt proc,m; 4129 PetscInt **buf_ri,**buf_rj; 4130 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4131 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4132 MPI_Request *s_waits,*r_waits; 4133 MPI_Status *status; 4134 MatScalar *aa=a->a; 4135 MatScalar **abuf_r,*ba_i; 4136 Mat_Merge_SeqsToMPI *merge; 4137 PetscContainer container; 4138 4139 PetscFunctionBegin; 4140 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4141 4142 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4143 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4144 4145 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4146 if (container) { 4147 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4148 } 4149 bi = merge->bi; 4150 bj = merge->bj; 4151 buf_ri = merge->buf_ri; 4152 buf_rj = merge->buf_rj; 4153 4154 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4155 owners = merge->rowmap->range; 4156 len_s = merge->len_s; 4157 4158 /* send and recv matrix values */ 4159 /*-----------------------------*/ 4160 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4161 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4162 4163 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4164 for (proc=0,k=0; proc<size; proc++){ 4165 if (!len_s[proc]) continue; 4166 i = owners[proc]; 4167 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4168 k++; 4169 } 4170 4171 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4172 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4173 ierr = PetscFree(status);CHKERRQ(ierr); 4174 4175 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4176 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4177 4178 /* insert mat values of mpimat */ 4179 /*----------------------------*/ 4180 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4181 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4182 4183 for (k=0; k<merge->nrecv; k++){ 4184 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4185 nrows = *(buf_ri_k[k]); 4186 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4187 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4188 } 4189 4190 /* set values of ba */ 4191 m = merge->rowmap->n; 4192 for (i=0; i<m; i++) { 4193 arow = owners[rank] + i; 4194 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4195 bnzi = bi[i+1] - bi[i]; 4196 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4197 4198 /* add local non-zero vals of this proc's seqmat into ba */ 4199 anzi = ai[arow+1] - ai[arow]; 4200 aj = a->j + ai[arow]; 4201 aa = a->a + ai[arow]; 4202 nextaj = 0; 4203 for (j=0; nextaj<anzi; j++){ 4204 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4205 ba_i[j] += aa[nextaj++]; 4206 } 4207 } 4208 4209 /* add received vals into ba */ 4210 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4211 /* i-th row */ 4212 if (i == *nextrow[k]) { 4213 anzi = *(nextai[k]+1) - *nextai[k]; 4214 aj = buf_rj[k] + *(nextai[k]); 4215 aa = abuf_r[k] + *(nextai[k]); 4216 nextaj = 0; 4217 for (j=0; nextaj<anzi; j++){ 4218 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4219 ba_i[j] += aa[nextaj++]; 4220 } 4221 } 4222 nextrow[k]++; nextai[k]++; 4223 } 4224 } 4225 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4226 } 4227 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4228 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4229 4230 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4231 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4232 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4233 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4234 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4235 PetscFunctionReturn(0); 4236 } 4237 4238 #undef __FUNCT__ 4239 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4240 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4241 { 4242 PetscErrorCode ierr; 4243 Mat B_mpi; 4244 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4245 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4246 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4247 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4248 PetscInt len,proc,*dnz,*onz; 4249 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4250 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4251 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4252 MPI_Status *status; 4253 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4254 PetscBT lnkbt; 4255 Mat_Merge_SeqsToMPI *merge; 4256 PetscContainer container; 4257 4258 PetscFunctionBegin; 4259 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4260 4261 /* make sure it is a PETSc comm */ 4262 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4263 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4264 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4265 4266 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4267 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4268 4269 /* determine row ownership */ 4270 /*---------------------------------------------------------*/ 4271 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4272 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4273 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4274 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4275 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4276 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4277 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4278 4279 m = merge->rowmap->n; 4280 M = merge->rowmap->N; 4281 owners = merge->rowmap->range; 4282 4283 /* determine the number of messages to send, their lengths */ 4284 /*---------------------------------------------------------*/ 4285 len_s = merge->len_s; 4286 4287 len = 0; /* length of buf_si[] */ 4288 merge->nsend = 0; 4289 for (proc=0; proc<size; proc++){ 4290 len_si[proc] = 0; 4291 if (proc == rank){ 4292 len_s[proc] = 0; 4293 } else { 4294 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4295 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4296 } 4297 if (len_s[proc]) { 4298 merge->nsend++; 4299 nrows = 0; 4300 for (i=owners[proc]; i<owners[proc+1]; i++){ 4301 if (ai[i+1] > ai[i]) nrows++; 4302 } 4303 len_si[proc] = 2*(nrows+1); 4304 len += len_si[proc]; 4305 } 4306 } 4307 4308 /* determine the number and length of messages to receive for ij-structure */ 4309 /*-------------------------------------------------------------------------*/ 4310 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4311 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4312 4313 /* post the Irecv of j-structure */ 4314 /*-------------------------------*/ 4315 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4316 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4317 4318 /* post the Isend of j-structure */ 4319 /*--------------------------------*/ 4320 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4321 4322 for (proc=0, k=0; proc<size; proc++){ 4323 if (!len_s[proc]) continue; 4324 i = owners[proc]; 4325 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4326 k++; 4327 } 4328 4329 /* receives and sends of j-structure are complete */ 4330 /*------------------------------------------------*/ 4331 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4332 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4333 4334 /* send and recv i-structure */ 4335 /*---------------------------*/ 4336 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4337 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4338 4339 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4340 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4341 for (proc=0,k=0; proc<size; proc++){ 4342 if (!len_s[proc]) continue; 4343 /* form outgoing message for i-structure: 4344 buf_si[0]: nrows to be sent 4345 [1:nrows]: row index (global) 4346 [nrows+1:2*nrows+1]: i-structure index 4347 */ 4348 /*-------------------------------------------*/ 4349 nrows = len_si[proc]/2 - 1; 4350 buf_si_i = buf_si + nrows+1; 4351 buf_si[0] = nrows; 4352 buf_si_i[0] = 0; 4353 nrows = 0; 4354 for (i=owners[proc]; i<owners[proc+1]; i++){ 4355 anzi = ai[i+1] - ai[i]; 4356 if (anzi) { 4357 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4358 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4359 nrows++; 4360 } 4361 } 4362 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4363 k++; 4364 buf_si += len_si[proc]; 4365 } 4366 4367 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4368 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4369 4370 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4371 for (i=0; i<merge->nrecv; i++){ 4372 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4373 } 4374 4375 ierr = PetscFree(len_si);CHKERRQ(ierr); 4376 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4377 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4378 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4379 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4380 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4381 ierr = PetscFree(status);CHKERRQ(ierr); 4382 4383 /* compute a local seq matrix in each processor */ 4384 /*----------------------------------------------*/ 4385 /* allocate bi array and free space for accumulating nonzero column info */ 4386 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4387 bi[0] = 0; 4388 4389 /* create and initialize a linked list */ 4390 nlnk = N+1; 4391 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4392 4393 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4394 len = 0; 4395 len = ai[owners[rank+1]] - ai[owners[rank]]; 4396 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4397 current_space = free_space; 4398 4399 /* determine symbolic info for each local row */ 4400 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4401 4402 for (k=0; k<merge->nrecv; k++){ 4403 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4404 nrows = *buf_ri_k[k]; 4405 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4406 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4407 } 4408 4409 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4410 len = 0; 4411 for (i=0;i<m;i++) { 4412 bnzi = 0; 4413 /* add local non-zero cols of this proc's seqmat into lnk */ 4414 arow = owners[rank] + i; 4415 anzi = ai[arow+1] - ai[arow]; 4416 aj = a->j + ai[arow]; 4417 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4418 bnzi += nlnk; 4419 /* add received col data into lnk */ 4420 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4421 if (i == *nextrow[k]) { /* i-th row */ 4422 anzi = *(nextai[k]+1) - *nextai[k]; 4423 aj = buf_rj[k] + *nextai[k]; 4424 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4425 bnzi += nlnk; 4426 nextrow[k]++; nextai[k]++; 4427 } 4428 } 4429 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4430 4431 /* if free space is not available, make more free space */ 4432 if (current_space->local_remaining<bnzi) { 4433 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4434 nspacedouble++; 4435 } 4436 /* copy data into free space, then initialize lnk */ 4437 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4438 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4439 4440 current_space->array += bnzi; 4441 current_space->local_used += bnzi; 4442 current_space->local_remaining -= bnzi; 4443 4444 bi[i+1] = bi[i] + bnzi; 4445 } 4446 4447 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4448 4449 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4450 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4451 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4452 4453 /* create symbolic parallel matrix B_mpi */ 4454 /*---------------------------------------*/ 4455 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4456 if (n==PETSC_DECIDE) { 4457 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4458 } else { 4459 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4460 } 4461 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4462 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4463 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4464 4465 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4466 B_mpi->assembled = PETSC_FALSE; 4467 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4468 merge->bi = bi; 4469 merge->bj = bj; 4470 merge->buf_ri = buf_ri; 4471 merge->buf_rj = buf_rj; 4472 merge->coi = PETSC_NULL; 4473 merge->coj = PETSC_NULL; 4474 merge->owners_co = PETSC_NULL; 4475 4476 /* attach the supporting struct to B_mpi for reuse */ 4477 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4478 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4479 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4480 *mpimat = B_mpi; 4481 4482 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4483 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4484 PetscFunctionReturn(0); 4485 } 4486 4487 #undef __FUNCT__ 4488 #define __FUNCT__ "MatMerge_SeqsToMPI" 4489 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4490 { 4491 PetscErrorCode ierr; 4492 4493 PetscFunctionBegin; 4494 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4495 if (scall == MAT_INITIAL_MATRIX){ 4496 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4497 } 4498 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4499 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4500 PetscFunctionReturn(0); 4501 } 4502 4503 #undef __FUNCT__ 4504 #define __FUNCT__ "MatGetLocalMat" 4505 /*@ 4506 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 4507 4508 Not Collective 4509 4510 Input Parameters: 4511 + A - the matrix 4512 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4513 4514 Output Parameter: 4515 . A_loc - the local sequential matrix generated 4516 4517 Level: developer 4518 4519 @*/ 4520 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4521 { 4522 PetscErrorCode ierr; 4523 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4524 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4525 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4526 MatScalar *aa=a->a,*ba=b->a,*cam; 4527 PetscScalar *ca; 4528 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4529 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4530 4531 PetscFunctionBegin; 4532 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4533 if (scall == MAT_INITIAL_MATRIX){ 4534 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4535 ci[0] = 0; 4536 for (i=0; i<am; i++){ 4537 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4538 } 4539 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4540 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4541 k = 0; 4542 for (i=0; i<am; i++) { 4543 ncols_o = bi[i+1] - bi[i]; 4544 ncols_d = ai[i+1] - ai[i]; 4545 /* off-diagonal portion of A */ 4546 for (jo=0; jo<ncols_o; jo++) { 4547 col = cmap[*bj]; 4548 if (col >= cstart) break; 4549 cj[k] = col; bj++; 4550 ca[k++] = *ba++; 4551 } 4552 /* diagonal portion of A */ 4553 for (j=0; j<ncols_d; j++) { 4554 cj[k] = cstart + *aj++; 4555 ca[k++] = *aa++; 4556 } 4557 /* off-diagonal portion of A */ 4558 for (j=jo; j<ncols_o; j++) { 4559 cj[k] = cmap[*bj++]; 4560 ca[k++] = *ba++; 4561 } 4562 } 4563 /* put together the new matrix */ 4564 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4565 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4566 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4567 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4568 mat->free_a = PETSC_TRUE; 4569 mat->free_ij = PETSC_TRUE; 4570 mat->nonew = 0; 4571 } else if (scall == MAT_REUSE_MATRIX){ 4572 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4573 ci = mat->i; cj = mat->j; cam = mat->a; 4574 for (i=0; i<am; i++) { 4575 /* off-diagonal portion of A */ 4576 ncols_o = bi[i+1] - bi[i]; 4577 for (jo=0; jo<ncols_o; jo++) { 4578 col = cmap[*bj]; 4579 if (col >= cstart) break; 4580 *cam++ = *ba++; bj++; 4581 } 4582 /* diagonal portion of A */ 4583 ncols_d = ai[i+1] - ai[i]; 4584 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4585 /* off-diagonal portion of A */ 4586 for (j=jo; j<ncols_o; j++) { 4587 *cam++ = *ba++; bj++; 4588 } 4589 } 4590 } else { 4591 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4592 } 4593 4594 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4595 PetscFunctionReturn(0); 4596 } 4597 4598 #undef __FUNCT__ 4599 #define __FUNCT__ "MatGetLocalMatCondensed" 4600 /*@C 4601 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 4602 4603 Not Collective 4604 4605 Input Parameters: 4606 + A - the matrix 4607 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4608 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4609 4610 Output Parameter: 4611 . A_loc - the local sequential matrix generated 4612 4613 Level: developer 4614 4615 @*/ 4616 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4617 { 4618 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4619 PetscErrorCode ierr; 4620 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4621 IS isrowa,iscola; 4622 Mat *aloc; 4623 4624 PetscFunctionBegin; 4625 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4626 if (!row){ 4627 start = A->rmap->rstart; end = A->rmap->rend; 4628 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4629 } else { 4630 isrowa = *row; 4631 } 4632 if (!col){ 4633 start = A->cmap->rstart; 4634 cmap = a->garray; 4635 nzA = a->A->cmap->n; 4636 nzB = a->B->cmap->n; 4637 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4638 ncols = 0; 4639 for (i=0; i<nzB; i++) { 4640 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4641 else break; 4642 } 4643 imark = i; 4644 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4645 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4646 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4647 } else { 4648 iscola = *col; 4649 } 4650 if (scall != MAT_INITIAL_MATRIX){ 4651 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4652 aloc[0] = *A_loc; 4653 } 4654 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4655 *A_loc = aloc[0]; 4656 ierr = PetscFree(aloc);CHKERRQ(ierr); 4657 if (!row){ 4658 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 4659 } 4660 if (!col){ 4661 ierr = ISDestroy(iscola);CHKERRQ(ierr); 4662 } 4663 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4664 PetscFunctionReturn(0); 4665 } 4666 4667 #undef __FUNCT__ 4668 #define __FUNCT__ "MatGetBrowsOfAcols" 4669 /*@C 4670 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4671 4672 Collective on Mat 4673 4674 Input Parameters: 4675 + A,B - the matrices in mpiaij format 4676 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4677 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 4678 4679 Output Parameter: 4680 + rowb, colb - index sets of rows and columns of B to extract 4681 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 4682 - B_seq - the sequential matrix generated 4683 4684 Level: developer 4685 4686 @*/ 4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 4688 { 4689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4690 PetscErrorCode ierr; 4691 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4692 IS isrowb,iscolb; 4693 Mat *bseq; 4694 4695 PetscFunctionBegin; 4696 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4697 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4698 } 4699 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4700 4701 if (scall == MAT_INITIAL_MATRIX){ 4702 start = A->cmap->rstart; 4703 cmap = a->garray; 4704 nzA = a->A->cmap->n; 4705 nzB = a->B->cmap->n; 4706 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4707 ncols = 0; 4708 for (i=0; i<nzB; i++) { /* row < local row index */ 4709 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4710 else break; 4711 } 4712 imark = i; 4713 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4714 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4715 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4716 *brstart = imark; 4717 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4718 } else { 4719 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4720 isrowb = *rowb; iscolb = *colb; 4721 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4722 bseq[0] = *B_seq; 4723 } 4724 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4725 *B_seq = bseq[0]; 4726 ierr = PetscFree(bseq);CHKERRQ(ierr); 4727 if (!rowb){ 4728 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 4729 } else { 4730 *rowb = isrowb; 4731 } 4732 if (!colb){ 4733 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 4734 } else { 4735 *colb = iscolb; 4736 } 4737 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4738 PetscFunctionReturn(0); 4739 } 4740 4741 #undef __FUNCT__ 4742 #define __FUNCT__ "MatGetBrowsOfAoCols" 4743 /*@C 4744 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4745 of the OFF-DIAGONAL portion of local A 4746 4747 Collective on Mat 4748 4749 Input Parameters: 4750 + A,B - the matrices in mpiaij format 4751 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4752 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 4753 . startsj_r - similar to startsj for receives 4754 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 4755 4756 Output Parameter: 4757 + B_oth - the sequential matrix generated 4758 4759 Level: developer 4760 4761 @*/ 4762 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4763 { 4764 VecScatter_MPI_General *gen_to,*gen_from; 4765 PetscErrorCode ierr; 4766 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4767 Mat_SeqAIJ *b_oth; 4768 VecScatter ctx=a->Mvctx; 4769 MPI_Comm comm=((PetscObject)ctx)->comm; 4770 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4771 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4772 PetscScalar *rvalues,*svalues; 4773 MatScalar *b_otha,*bufa,*bufA; 4774 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4775 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 4776 MPI_Status *sstatus,rstatus; 4777 PetscMPIInt jj; 4778 PetscInt *cols,sbs,rbs; 4779 PetscScalar *vals; 4780 4781 PetscFunctionBegin; 4782 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 4783 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4784 } 4785 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4786 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4787 4788 gen_to = (VecScatter_MPI_General*)ctx->todata; 4789 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4790 rvalues = gen_from->values; /* holds the length of receiving row */ 4791 svalues = gen_to->values; /* holds the length of sending row */ 4792 nrecvs = gen_from->n; 4793 nsends = gen_to->n; 4794 4795 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 4796 srow = gen_to->indices; /* local row index to be sent */ 4797 sstarts = gen_to->starts; 4798 sprocs = gen_to->procs; 4799 sstatus = gen_to->sstatus; 4800 sbs = gen_to->bs; 4801 rstarts = gen_from->starts; 4802 rprocs = gen_from->procs; 4803 rbs = gen_from->bs; 4804 4805 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4806 if (scall == MAT_INITIAL_MATRIX){ 4807 /* i-array */ 4808 /*---------*/ 4809 /* post receives */ 4810 for (i=0; i<nrecvs; i++){ 4811 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4812 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4813 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4814 } 4815 4816 /* pack the outgoing message */ 4817 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 4818 sstartsj[0] = 0; rstartsj[0] = 0; 4819 len = 0; /* total length of j or a array to be sent */ 4820 k = 0; 4821 for (i=0; i<nsends; i++){ 4822 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4823 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4824 for (j=0; j<nrows; j++) { 4825 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4826 for (l=0; l<sbs; l++){ 4827 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 4828 rowlen[j*sbs+l] = ncols; 4829 len += ncols; 4830 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 4831 } 4832 k++; 4833 } 4834 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4835 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4836 } 4837 /* recvs and sends of i-array are completed */ 4838 i = nrecvs; 4839 while (i--) { 4840 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4841 } 4842 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4843 4844 /* allocate buffers for sending j and a arrays */ 4845 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 4846 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 4847 4848 /* create i-array of B_oth */ 4849 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 4850 b_othi[0] = 0; 4851 len = 0; /* total length of j or a array to be received */ 4852 k = 0; 4853 for (i=0; i<nrecvs; i++){ 4854 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4855 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4856 for (j=0; j<nrows; j++) { 4857 b_othi[k+1] = b_othi[k] + rowlen[j]; 4858 len += rowlen[j]; k++; 4859 } 4860 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4861 } 4862 4863 /* allocate space for j and a arrrays of B_oth */ 4864 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 4865 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 4866 4867 /* j-array */ 4868 /*---------*/ 4869 /* post receives of j-array */ 4870 for (i=0; i<nrecvs; i++){ 4871 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4872 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4873 } 4874 4875 /* pack the outgoing message j-array */ 4876 k = 0; 4877 for (i=0; i<nsends; i++){ 4878 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4879 bufJ = bufj+sstartsj[i]; 4880 for (j=0; j<nrows; j++) { 4881 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4882 for (ll=0; ll<sbs; ll++){ 4883 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4884 for (l=0; l<ncols; l++){ 4885 *bufJ++ = cols[l]; 4886 } 4887 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 4888 } 4889 } 4890 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4891 } 4892 4893 /* recvs and sends of j-array are completed */ 4894 i = nrecvs; 4895 while (i--) { 4896 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4897 } 4898 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4899 } else if (scall == MAT_REUSE_MATRIX){ 4900 sstartsj = *startsj; 4901 rstartsj = *startsj_r; 4902 bufa = *bufa_ptr; 4903 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4904 b_otha = b_oth->a; 4905 } else { 4906 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4907 } 4908 4909 /* a-array */ 4910 /*---------*/ 4911 /* post receives of a-array */ 4912 for (i=0; i<nrecvs; i++){ 4913 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4914 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4915 } 4916 4917 /* pack the outgoing message a-array */ 4918 k = 0; 4919 for (i=0; i<nsends; i++){ 4920 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4921 bufA = bufa+sstartsj[i]; 4922 for (j=0; j<nrows; j++) { 4923 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4924 for (ll=0; ll<sbs; ll++){ 4925 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4926 for (l=0; l<ncols; l++){ 4927 *bufA++ = vals[l]; 4928 } 4929 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 4930 } 4931 } 4932 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4933 } 4934 /* recvs and sends of a-array are completed */ 4935 i = nrecvs; 4936 while (i--) { 4937 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4938 } 4939 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4940 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4941 4942 if (scall == MAT_INITIAL_MATRIX){ 4943 /* put together the new matrix */ 4944 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4945 4946 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4947 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4948 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 4949 b_oth->free_a = PETSC_TRUE; 4950 b_oth->free_ij = PETSC_TRUE; 4951 b_oth->nonew = 0; 4952 4953 ierr = PetscFree(bufj);CHKERRQ(ierr); 4954 if (!startsj || !bufa_ptr){ 4955 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4956 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4957 } else { 4958 *startsj = sstartsj; 4959 *startsj_r = rstartsj; 4960 *bufa_ptr = bufa; 4961 } 4962 } 4963 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 4967 #undef __FUNCT__ 4968 #define __FUNCT__ "MatGetCommunicationStructs" 4969 /*@C 4970 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4971 4972 Not Collective 4973 4974 Input Parameters: 4975 . A - The matrix in mpiaij format 4976 4977 Output Parameter: 4978 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4979 . colmap - A map from global column index to local index into lvec 4980 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4981 4982 Level: developer 4983 4984 @*/ 4985 #if defined (PETSC_USE_CTABLE) 4986 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4987 #else 4988 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4989 #endif 4990 { 4991 Mat_MPIAIJ *a; 4992 4993 PetscFunctionBegin; 4994 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4995 PetscValidPointer(lvec, 2); 4996 PetscValidPointer(colmap, 3); 4997 PetscValidPointer(multScatter, 4); 4998 a = (Mat_MPIAIJ *) A->data; 4999 if (lvec) *lvec = a->lvec; 5000 if (colmap) *colmap = a->colmap; 5001 if (multScatter) *multScatter = a->Mvctx; 5002 PetscFunctionReturn(0); 5003 } 5004 5005 EXTERN_C_BEGIN 5006 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5007 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5008 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5009 EXTERN_C_END 5010 5011 #undef __FUNCT__ 5012 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5013 /* 5014 Computes (B'*A')' since computing B*A directly is untenable 5015 5016 n p p 5017 ( ) ( ) ( ) 5018 m ( A ) * n ( B ) = m ( C ) 5019 ( ) ( ) ( ) 5020 5021 */ 5022 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5023 { 5024 PetscErrorCode ierr; 5025 Mat At,Bt,Ct; 5026 5027 PetscFunctionBegin; 5028 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5029 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5030 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5031 ierr = MatDestroy(At);CHKERRQ(ierr); 5032 ierr = MatDestroy(Bt);CHKERRQ(ierr); 5033 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5034 ierr = MatDestroy(Ct);CHKERRQ(ierr); 5035 PetscFunctionReturn(0); 5036 } 5037 5038 #undef __FUNCT__ 5039 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5040 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5041 { 5042 PetscErrorCode ierr; 5043 PetscInt m=A->rmap->n,n=B->cmap->n; 5044 Mat Cmat; 5045 5046 PetscFunctionBegin; 5047 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5048 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5049 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5050 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5051 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5052 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5053 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5054 *C = Cmat; 5055 PetscFunctionReturn(0); 5056 } 5057 5058 /* ----------------------------------------------------------------*/ 5059 #undef __FUNCT__ 5060 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5061 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5062 { 5063 PetscErrorCode ierr; 5064 5065 PetscFunctionBegin; 5066 if (scall == MAT_INITIAL_MATRIX){ 5067 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5068 } 5069 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5070 PetscFunctionReturn(0); 5071 } 5072 5073 EXTERN_C_BEGIN 5074 #if defined(PETSC_HAVE_MUMPS) 5075 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5076 #endif 5077 #if defined(PETSC_HAVE_PASTIX) 5078 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5079 #endif 5080 #if defined(PETSC_HAVE_SUPERLU_DIST) 5081 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5082 #endif 5083 #if defined(PETSC_HAVE_SPOOLES) 5084 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5085 #endif 5086 EXTERN_C_END 5087 5088 /*MC 5089 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5090 5091 Options Database Keys: 5092 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5093 5094 Level: beginner 5095 5096 .seealso: MatCreateMPIAIJ() 5097 M*/ 5098 5099 EXTERN_C_BEGIN 5100 #undef __FUNCT__ 5101 #define __FUNCT__ "MatCreate_MPIAIJ" 5102 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 5103 { 5104 Mat_MPIAIJ *b; 5105 PetscErrorCode ierr; 5106 PetscMPIInt size; 5107 5108 PetscFunctionBegin; 5109 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5110 5111 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5112 B->data = (void*)b; 5113 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5114 B->rmap->bs = 1; 5115 B->assembled = PETSC_FALSE; 5116 B->mapping = 0; 5117 5118 B->insertmode = NOT_SET_VALUES; 5119 b->size = size; 5120 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5121 5122 /* build cache for off array entries formed */ 5123 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5124 b->donotstash = PETSC_FALSE; 5125 b->colmap = 0; 5126 b->garray = 0; 5127 b->roworiented = PETSC_TRUE; 5128 5129 /* stuff used for matrix vector multiply */ 5130 b->lvec = PETSC_NULL; 5131 b->Mvctx = PETSC_NULL; 5132 5133 /* stuff for MatGetRow() */ 5134 b->rowindices = 0; 5135 b->rowvalues = 0; 5136 b->getrowactive = PETSC_FALSE; 5137 5138 #if defined(PETSC_HAVE_SPOOLES) 5139 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5140 "MatGetFactor_mpiaij_spooles", 5141 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5142 #endif 5143 #if defined(PETSC_HAVE_MUMPS) 5144 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5145 "MatGetFactor_aij_mumps", 5146 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5147 #endif 5148 #if defined(PETSC_HAVE_PASTIX) 5149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5150 "MatGetFactor_mpiaij_pastix", 5151 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5152 #endif 5153 #if defined(PETSC_HAVE_SUPERLU_DIST) 5154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5155 "MatGetFactor_mpiaij_superlu_dist", 5156 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5157 #endif 5158 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5159 "MatStoreValues_MPIAIJ", 5160 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5161 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5162 "MatRetrieveValues_MPIAIJ", 5163 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5164 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5165 "MatGetDiagonalBlock_MPIAIJ", 5166 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5167 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5168 "MatIsTranspose_MPIAIJ", 5169 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5170 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5171 "MatMPIAIJSetPreallocation_MPIAIJ", 5172 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5173 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5174 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5175 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5176 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5177 "MatDiagonalScaleLocal_MPIAIJ", 5178 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5179 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5180 "MatConvert_MPIAIJ_MPIAIJPERM", 5181 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5182 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5183 "MatConvert_MPIAIJ_MPIAIJCRL", 5184 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5185 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5186 "MatConvert_MPIAIJ_MPISBAIJ", 5187 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5188 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5189 "MatMatMult_MPIDense_MPIAIJ", 5190 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5191 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5192 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5193 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5194 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5195 "MatMatMultNumeric_MPIDense_MPIAIJ", 5196 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5197 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5198 PetscFunctionReturn(0); 5199 } 5200 EXTERN_C_END 5201 5202 #undef __FUNCT__ 5203 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5204 /*@ 5205 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5206 and "off-diagonal" part of the matrix in CSR format. 5207 5208 Collective on MPI_Comm 5209 5210 Input Parameters: 5211 + comm - MPI communicator 5212 . m - number of local rows (Cannot be PETSC_DECIDE) 5213 . n - This value should be the same as the local size used in creating the 5214 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5215 calculated if N is given) For square matrices n is almost always m. 5216 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5217 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5218 . i - row indices for "diagonal" portion of matrix 5219 . j - column indices 5220 . a - matrix values 5221 . oi - row indices for "off-diagonal" portion of matrix 5222 . oj - column indices 5223 - oa - matrix values 5224 5225 Output Parameter: 5226 . mat - the matrix 5227 5228 Level: advanced 5229 5230 Notes: 5231 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. 5232 5233 The i and j indices are 0 based 5234 5235 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5236 5237 This sets local rows and cannot be used to set off-processor values. 5238 5239 You cannot later use MatSetValues() to change values in this matrix. 5240 5241 .keywords: matrix, aij, compressed row, sparse, parallel 5242 5243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5244 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5245 @*/ 5246 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5247 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5248 { 5249 PetscErrorCode ierr; 5250 Mat_MPIAIJ *maij; 5251 5252 PetscFunctionBegin; 5253 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5254 if (i[0]) { 5255 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5256 } 5257 if (oi[0]) { 5258 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5259 } 5260 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5261 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5262 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5263 maij = (Mat_MPIAIJ*) (*mat)->data; 5264 maij->donotstash = PETSC_TRUE; 5265 (*mat)->preallocated = PETSC_TRUE; 5266 5267 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5268 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5269 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5270 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5271 5272 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5273 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5274 5275 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5276 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5277 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5278 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5279 5280 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5281 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5282 PetscFunctionReturn(0); 5283 } 5284 5285 /* 5286 Special version for direct calls from Fortran 5287 */ 5288 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5289 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5290 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5291 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5292 #endif 5293 5294 /* Change these macros so can be used in void function */ 5295 #undef CHKERRQ 5296 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5297 #undef SETERRQ2 5298 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5299 #undef SETERRQ 5300 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5301 5302 EXTERN_C_BEGIN 5303 #undef __FUNCT__ 5304 #define __FUNCT__ "matsetvaluesmpiaij_" 5305 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5306 { 5307 Mat mat = *mmat; 5308 PetscInt m = *mm, n = *mn; 5309 InsertMode addv = *maddv; 5310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5311 PetscScalar value; 5312 PetscErrorCode ierr; 5313 5314 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5315 if (mat->insertmode == NOT_SET_VALUES) { 5316 mat->insertmode = addv; 5317 } 5318 #if defined(PETSC_USE_DEBUG) 5319 else if (mat->insertmode != addv) { 5320 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5321 } 5322 #endif 5323 { 5324 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5325 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5326 PetscBool roworiented = aij->roworiented; 5327 5328 /* Some Variables required in the macro */ 5329 Mat A = aij->A; 5330 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5331 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5332 MatScalar *aa = a->a; 5333 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5334 Mat B = aij->B; 5335 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5336 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5337 MatScalar *ba = b->a; 5338 5339 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5340 PetscInt nonew = a->nonew; 5341 MatScalar *ap1,*ap2; 5342 5343 PetscFunctionBegin; 5344 for (i=0; i<m; i++) { 5345 if (im[i] < 0) continue; 5346 #if defined(PETSC_USE_DEBUG) 5347 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5348 #endif 5349 if (im[i] >= rstart && im[i] < rend) { 5350 row = im[i] - rstart; 5351 lastcol1 = -1; 5352 rp1 = aj + ai[row]; 5353 ap1 = aa + ai[row]; 5354 rmax1 = aimax[row]; 5355 nrow1 = ailen[row]; 5356 low1 = 0; 5357 high1 = nrow1; 5358 lastcol2 = -1; 5359 rp2 = bj + bi[row]; 5360 ap2 = ba + bi[row]; 5361 rmax2 = bimax[row]; 5362 nrow2 = bilen[row]; 5363 low2 = 0; 5364 high2 = nrow2; 5365 5366 for (j=0; j<n; j++) { 5367 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5368 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5369 if (in[j] >= cstart && in[j] < cend){ 5370 col = in[j] - cstart; 5371 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5372 } else if (in[j] < 0) continue; 5373 #if defined(PETSC_USE_DEBUG) 5374 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5375 #endif 5376 else { 5377 if (mat->was_assembled) { 5378 if (!aij->colmap) { 5379 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5380 } 5381 #if defined (PETSC_USE_CTABLE) 5382 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5383 col--; 5384 #else 5385 col = aij->colmap[in[j]] - 1; 5386 #endif 5387 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5389 col = in[j]; 5390 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5391 B = aij->B; 5392 b = (Mat_SeqAIJ*)B->data; 5393 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5394 rp2 = bj + bi[row]; 5395 ap2 = ba + bi[row]; 5396 rmax2 = bimax[row]; 5397 nrow2 = bilen[row]; 5398 low2 = 0; 5399 high2 = nrow2; 5400 bm = aij->B->rmap->n; 5401 ba = b->a; 5402 } 5403 } else col = in[j]; 5404 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5405 } 5406 } 5407 } else { 5408 if (!aij->donotstash) { 5409 if (roworiented) { 5410 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5411 } else { 5412 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5413 } 5414 } 5415 } 5416 }} 5417 PetscFunctionReturnVoid(); 5418 } 5419 EXTERN_C_END 5420 5421