1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 8 { 9 PetscErrorCode ierr; 10 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 11 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 12 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 13 const PetscInt *ia,*ib; 14 const MatScalar *aa,*bb; 15 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 16 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 17 18 PetscFunctionBegin; 19 *keptrows = 0; 20 ia = a->i; 21 ib = b->i; 22 for (i=0; i<m; i++) { 23 na = ia[i+1] - ia[i]; 24 nb = ib[i+1] - ib[i]; 25 if (!na && !nb) { 26 cnt++; 27 goto ok1; 28 } 29 aa = a->a + ia[i]; 30 for (j=0; j<na; j++) { 31 if (aa[j] != 0.0) goto ok1; 32 } 33 bb = b->a + ib[i]; 34 for (j=0; j <nb; j++) { 35 if (bb[j] != 0.0) goto ok1; 36 } 37 cnt++; 38 ok1:; 39 } 40 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 41 if (!n0rows) PetscFunctionReturn(0); 42 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 43 cnt = 0; 44 for (i=0; i<m; i++) { 45 na = ia[i+1] - ia[i]; 46 nb = ib[i+1] - ib[i]; 47 if (!na && !nb) continue; 48 aa = a->a + ia[i]; 49 for(j=0; j<na;j++) { 50 if (aa[j] != 0.0) { 51 rows[cnt++] = rstart + i; 52 goto ok2; 53 } 54 } 55 bb = b->a + ib[i]; 56 for (j=0; j<nb; j++) { 57 if (bb[j] != 0.0) { 58 rows[cnt++] = rstart + i; 59 goto ok2; 60 } 61 } 62 ok2:; 63 } 64 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 70 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 71 { 72 PetscErrorCode ierr; 73 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 74 PetscInt i,n,*garray = aij->garray; 75 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 76 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 77 PetscReal *work; 78 79 PetscFunctionBegin; 80 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 81 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 82 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 83 if (type == NORM_2) { 84 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 85 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 86 } 87 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 88 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 89 } 90 } else if (type == NORM_1) { 91 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 92 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 93 } 94 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 95 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 96 } 97 } else if (type == NORM_INFINITY) { 98 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 99 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 100 } 101 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 102 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 103 } 104 105 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 106 if (type == NORM_INFINITY) { 107 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 108 } else { 109 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 110 } 111 ierr = PetscFree(work);CHKERRQ(ierr); 112 if (type == NORM_2) { 113 for (i=0; i<n; i++) norms[i] = sqrt(norms[i]); 114 } 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatDistribute_MPIAIJ" 120 /* 121 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 122 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 123 124 Only for square matrices 125 */ 126 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 127 { 128 PetscMPIInt rank,size; 129 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 130 PetscErrorCode ierr; 131 Mat mat; 132 Mat_SeqAIJ *gmata; 133 PetscMPIInt tag; 134 MPI_Status status; 135 PetscBool aij; 136 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 137 138 PetscFunctionBegin; 139 CHKMEMQ; 140 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 141 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 142 if (!rank) { 143 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 144 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 145 } 146 if (reuse == MAT_INITIAL_MATRIX) { 147 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 148 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 149 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 150 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 151 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 152 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 153 rowners[0] = 0; 154 for (i=2; i<=size; i++) { 155 rowners[i] += rowners[i-1]; 156 } 157 rstart = rowners[rank]; 158 rend = rowners[rank+1]; 159 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 160 if (!rank) { 161 gmata = (Mat_SeqAIJ*) gmat->data; 162 /* send row lengths to all processors */ 163 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 164 for (i=1; i<size; i++) { 165 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 166 } 167 /* determine number diagonal and off-diagonal counts */ 168 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 169 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 170 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 171 jj = 0; 172 for (i=0; i<m; i++) { 173 for (j=0; j<dlens[i]; j++) { 174 if (gmata->j[jj] < rstart) ld[i]++; 175 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 176 jj++; 177 } 178 } 179 /* send column indices to other processes */ 180 for (i=1; i<size; i++) { 181 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 182 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 183 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 184 } 185 186 /* send numerical values to other processes */ 187 for (i=1; i<size; i++) { 188 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 189 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 190 } 191 gmataa = gmata->a; 192 gmataj = gmata->j; 193 194 } else { 195 /* receive row lengths */ 196 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 197 /* receive column indices */ 198 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 199 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 200 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 201 /* determine number diagonal and off-diagonal counts */ 202 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 203 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 204 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 205 jj = 0; 206 for (i=0; i<m; i++) { 207 for (j=0; j<dlens[i]; j++) { 208 if (gmataj[jj] < rstart) ld[i]++; 209 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 210 jj++; 211 } 212 } 213 /* receive numerical values */ 214 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 215 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 216 } 217 /* set preallocation */ 218 for (i=0; i<m; i++) { 219 dlens[i] -= olens[i]; 220 } 221 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 222 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 223 224 for (i=0; i<m; i++) { 225 dlens[i] += olens[i]; 226 } 227 cnt = 0; 228 for (i=0; i<m; i++) { 229 row = rstart + i; 230 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 231 cnt += dlens[i]; 232 } 233 if (rank) { 234 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 235 } 236 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 237 ierr = PetscFree(rowners);CHKERRQ(ierr); 238 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 239 *inmat = mat; 240 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 241 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 242 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 243 mat = *inmat; 244 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 245 if (!rank) { 246 /* send numerical values to other processes */ 247 gmata = (Mat_SeqAIJ*) gmat->data; 248 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 249 gmataa = gmata->a; 250 for (i=1; i<size; i++) { 251 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 252 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 253 } 254 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 255 } else { 256 /* receive numerical values from process 0*/ 257 nz = Ad->nz + Ao->nz; 258 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 259 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 260 } 261 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 262 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 263 ad = Ad->a; 264 ao = Ao->a; 265 if (mat->rmap->n) { 266 i = 0; 267 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 268 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 269 } 270 for (i=1; i<mat->rmap->n; i++) { 271 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 272 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 273 } 274 i--; 275 if (mat->rmap->n) { 276 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 277 } 278 if (rank) { 279 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 280 } 281 } 282 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 283 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 284 CHKMEMQ; 285 PetscFunctionReturn(0); 286 } 287 288 /* 289 Local utility routine that creates a mapping from the global column 290 number to the local number in the off-diagonal part of the local 291 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 292 a slightly higher hash table cost; without it it is not scalable (each processor 293 has an order N integer array but is fast to acess. 294 */ 295 #undef __FUNCT__ 296 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 297 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 298 { 299 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 300 PetscErrorCode ierr; 301 PetscInt n = aij->B->cmap->n,i; 302 303 PetscFunctionBegin; 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 306 for (i=0; i<n; i++){ 307 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 308 } 309 #else 310 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 311 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 313 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 314 #endif 315 PetscFunctionReturn(0); 316 } 317 318 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 319 { \ 320 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 321 lastcol1 = col;\ 322 while (high1-low1 > 5) { \ 323 t = (low1+high1)/2; \ 324 if (rp1[t] > col) high1 = t; \ 325 else low1 = t; \ 326 } \ 327 for (_i=low1; _i<high1; _i++) { \ 328 if (rp1[_i] > col) break; \ 329 if (rp1[_i] == col) { \ 330 if (addv == ADD_VALUES) ap1[_i] += value; \ 331 else ap1[_i] = value; \ 332 goto a_noinsert; \ 333 } \ 334 } \ 335 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 336 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 337 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 338 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 339 N = nrow1++ - 1; a->nz++; high1++; \ 340 /* shift up all the later entries in this row */ \ 341 for (ii=N; ii>=_i; ii--) { \ 342 rp1[ii+1] = rp1[ii]; \ 343 ap1[ii+1] = ap1[ii]; \ 344 } \ 345 rp1[_i] = col; \ 346 ap1[_i] = value; \ 347 a_noinsert: ; \ 348 ailen[row] = nrow1; \ 349 } 350 351 352 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 353 { \ 354 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 355 lastcol2 = col;\ 356 while (high2-low2 > 5) { \ 357 t = (low2+high2)/2; \ 358 if (rp2[t] > col) high2 = t; \ 359 else low2 = t; \ 360 } \ 361 for (_i=low2; _i<high2; _i++) { \ 362 if (rp2[_i] > col) break; \ 363 if (rp2[_i] == col) { \ 364 if (addv == ADD_VALUES) ap2[_i] += value; \ 365 else ap2[_i] = value; \ 366 goto b_noinsert; \ 367 } \ 368 } \ 369 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 370 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 371 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 372 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 373 N = nrow2++ - 1; b->nz++; high2++; \ 374 /* shift up all the later entries in this row */ \ 375 for (ii=N; ii>=_i; ii--) { \ 376 rp2[ii+1] = rp2[ii]; \ 377 ap2[ii+1] = ap2[ii]; \ 378 } \ 379 rp2[_i] = col; \ 380 ap2[_i] = value; \ 381 b_noinsert: ; \ 382 bilen[row] = nrow2; \ 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 387 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 388 { 389 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 391 PetscErrorCode ierr; 392 PetscInt l,*garray = mat->garray,diag; 393 394 PetscFunctionBegin; 395 /* code only works for square matrices A */ 396 397 /* find size of row to the left of the diagonal part */ 398 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 399 row = row - diag; 400 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 401 if (garray[b->j[b->i[row]+l]] > diag) break; 402 } 403 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 404 405 /* diagonal part */ 406 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 407 408 /* right of diagonal part */ 409 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatSetValues_MPIAIJ" 415 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 416 { 417 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 418 PetscScalar value; 419 PetscErrorCode ierr; 420 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 421 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 422 PetscBool roworiented = aij->roworiented; 423 424 /* Some Variables required in the macro */ 425 Mat A = aij->A; 426 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 427 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 428 MatScalar *aa = a->a; 429 PetscBool ignorezeroentries = a->ignorezeroentries; 430 Mat B = aij->B; 431 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 432 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 433 MatScalar *ba = b->a; 434 435 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 436 PetscInt nonew = a->nonew; 437 MatScalar *ap1,*ap2; 438 439 PetscFunctionBegin; 440 if (v) PetscValidScalarPointer(v,6); 441 for (i=0; i<m; i++) { 442 if (im[i] < 0) continue; 443 #if defined(PETSC_USE_DEBUG) 444 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 445 #endif 446 if (im[i] >= rstart && im[i] < rend) { 447 row = im[i] - rstart; 448 lastcol1 = -1; 449 rp1 = aj + ai[row]; 450 ap1 = aa + ai[row]; 451 rmax1 = aimax[row]; 452 nrow1 = ailen[row]; 453 low1 = 0; 454 high1 = nrow1; 455 lastcol2 = -1; 456 rp2 = bj + bi[row]; 457 ap2 = ba + bi[row]; 458 rmax2 = bimax[row]; 459 nrow2 = bilen[row]; 460 low2 = 0; 461 high2 = nrow2; 462 463 for (j=0; j<n; j++) { 464 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 465 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 466 if (in[j] >= cstart && in[j] < cend){ 467 col = in[j] - cstart; 468 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 469 } else if (in[j] < 0) continue; 470 #if defined(PETSC_USE_DEBUG) 471 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 472 #endif 473 else { 474 if (mat->was_assembled) { 475 if (!aij->colmap) { 476 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 477 } 478 #if defined (PETSC_USE_CTABLE) 479 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 480 col--; 481 #else 482 col = aij->colmap[in[j]] - 1; 483 #endif 484 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 485 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 486 col = in[j]; 487 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 488 B = aij->B; 489 b = (Mat_SeqAIJ*)B->data; 490 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 491 rp2 = bj + bi[row]; 492 ap2 = ba + bi[row]; 493 rmax2 = bimax[row]; 494 nrow2 = bilen[row]; 495 low2 = 0; 496 high2 = nrow2; 497 bm = aij->B->rmap->n; 498 ba = b->a; 499 } 500 } else col = in[j]; 501 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 502 } 503 } 504 } else { 505 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 506 if (!aij->donotstash) { 507 if (roworiented) { 508 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 509 } else { 510 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 511 } 512 } 513 } 514 } 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatGetValues_MPIAIJ" 520 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 521 { 522 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 523 PetscErrorCode ierr; 524 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 525 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 526 527 PetscFunctionBegin; 528 for (i=0; i<m; i++) { 529 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 530 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 531 if (idxm[i] >= rstart && idxm[i] < rend) { 532 row = idxm[i] - rstart; 533 for (j=0; j<n; j++) { 534 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 535 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 536 if (idxn[j] >= cstart && idxn[j] < cend){ 537 col = idxn[j] - cstart; 538 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 539 } else { 540 if (!aij->colmap) { 541 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 542 } 543 #if defined (PETSC_USE_CTABLE) 544 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 545 col --; 546 #else 547 col = aij->colmap[idxn[j]] - 1; 548 #endif 549 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 550 else { 551 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 552 } 553 } 554 } 555 } else { 556 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 557 } 558 } 559 PetscFunctionReturn(0); 560 } 561 562 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 566 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 567 { 568 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt nstash,reallocs; 571 InsertMode addv; 572 573 PetscFunctionBegin; 574 if (aij->donotstash || mat->nooffprocentries) { 575 PetscFunctionReturn(0); 576 } 577 578 /* make sure all processors are either in INSERTMODE or ADDMODE */ 579 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 580 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 581 mat->insertmode = addv; /* in case this processor had no cache */ 582 583 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 584 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 585 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 591 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 592 { 593 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 594 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 595 PetscErrorCode ierr; 596 PetscMPIInt n; 597 PetscInt i,j,rstart,ncols,flg; 598 PetscInt *row,*col; 599 PetscBool other_disassembled; 600 PetscScalar *val; 601 InsertMode addv = mat->insertmode; 602 603 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 604 PetscFunctionBegin; 605 if (!aij->donotstash && !mat->nooffprocentries) { 606 while (1) { 607 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 608 if (!flg) break; 609 610 for (i=0; i<n;) { 611 /* Now identify the consecutive vals belonging to the same row */ 612 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 613 if (j < n) ncols = j-i; 614 else ncols = n-i; 615 /* Now assemble all these values with a single function call */ 616 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 617 i = j; 618 } 619 } 620 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 621 } 622 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 623 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 624 625 /* determine if any processor has disassembled, if so we must 626 also disassemble ourselfs, in order that we may reassemble. */ 627 /* 628 if nonzero structure of submatrix B cannot change then we know that 629 no processor disassembled thus we can skip this stuff 630 */ 631 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 632 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 633 if (mat->was_assembled && !other_disassembled) { 634 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 635 } 636 } 637 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 638 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 639 } 640 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 641 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 642 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 644 645 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 646 aij->rowvalues = 0; 647 648 /* used by MatAXPY() */ 649 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 650 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 651 652 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 653 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 659 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 660 { 661 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 662 PetscErrorCode ierr; 663 664 PetscFunctionBegin; 665 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 666 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "MatZeroRows_MPIAIJ" 672 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 673 { 674 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 675 PetscErrorCode ierr; 676 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 677 PetscInt i,*owners = A->rmap->range; 678 PetscInt *nprocs,j,idx,nsends,row; 679 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 680 PetscInt *rvalues,count,base,slen,*source; 681 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 682 MPI_Comm comm = ((PetscObject)A)->comm; 683 MPI_Request *send_waits,*recv_waits; 684 MPI_Status recv_status,*send_status; 685 const PetscScalar *xx; 686 PetscScalar *bb; 687 #if defined(PETSC_DEBUG) 688 PetscBool found = PETSC_FALSE; 689 #endif 690 691 PetscFunctionBegin; 692 /* first count number of contributors to each processor */ 693 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 694 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 695 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 696 j = 0; 697 for (i=0; i<N; i++) { 698 if (lastidx > (idx = rows[i])) j = 0; 699 lastidx = idx; 700 for (; j<size; j++) { 701 if (idx >= owners[j] && idx < owners[j+1]) { 702 nprocs[2*j]++; 703 nprocs[2*j+1] = 1; 704 owner[i] = j; 705 #if defined(PETSC_DEBUG) 706 found = PETSC_TRUE; 707 #endif 708 break; 709 } 710 } 711 #if defined(PETSC_DEBUG) 712 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 713 found = PETSC_FALSE; 714 #endif 715 } 716 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 717 718 if (A->nooffproczerorows) { 719 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 720 nrecvs = nsends; 721 nmax = N; 722 } else { 723 /* inform other processors of number of messages and max length*/ 724 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 725 } 726 727 /* post receives: */ 728 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 729 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 730 for (i=0; i<nrecvs; i++) { 731 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 732 } 733 734 /* do sends: 735 1) starts[i] gives the starting index in svalues for stuff going to 736 the ith processor 737 */ 738 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 739 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 740 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 741 starts[0] = 0; 742 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 743 for (i=0; i<N; i++) { 744 svalues[starts[owner[i]]++] = rows[i]; 745 } 746 747 starts[0] = 0; 748 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 749 count = 0; 750 for (i=0; i<size; i++) { 751 if (nprocs[2*i+1]) { 752 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 753 } 754 } 755 ierr = PetscFree(starts);CHKERRQ(ierr); 756 757 base = owners[rank]; 758 759 /* wait on receives */ 760 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 761 count = nrecvs; slen = 0; 762 while (count) { 763 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 764 /* unpack receives into our local space */ 765 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 766 source[imdex] = recv_status.MPI_SOURCE; 767 lens[imdex] = n; 768 slen += n; 769 count--; 770 } 771 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 772 773 /* move the data into the send scatter */ 774 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 775 count = 0; 776 for (i=0; i<nrecvs; i++) { 777 values = rvalues + i*nmax; 778 for (j=0; j<lens[i]; j++) { 779 lrows[count++] = values[j] - base; 780 } 781 } 782 ierr = PetscFree(rvalues);CHKERRQ(ierr); 783 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 784 ierr = PetscFree(owner);CHKERRQ(ierr); 785 ierr = PetscFree(nprocs);CHKERRQ(ierr); 786 787 /* fix right hand side if needed */ 788 if (x && b) { 789 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 790 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 791 for (i=0; i<slen; i++) { 792 bb[lrows[i]] = diag*xx[lrows[i]]; 793 } 794 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 795 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 796 } 797 /* 798 Zero the required rows. If the "diagonal block" of the matrix 799 is square and the user wishes to set the diagonal we use separate 800 code so that MatSetValues() is not called for each diagonal allocating 801 new memory, thus calling lots of mallocs and slowing things down. 802 803 */ 804 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 806 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 807 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 808 } else if (diag != 0.0) { 809 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 810 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 811 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 812 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 813 } 814 for (i = 0; i < slen; i++) { 815 row = lrows[i] + rstart; 816 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 817 } 818 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 } else { 821 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 822 } 823 ierr = PetscFree(lrows);CHKERRQ(ierr); 824 825 /* wait on sends */ 826 if (nsends) { 827 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 828 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 829 ierr = PetscFree(send_status);CHKERRQ(ierr); 830 } 831 ierr = PetscFree(send_waits);CHKERRQ(ierr); 832 ierr = PetscFree(svalues);CHKERRQ(ierr); 833 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 842 PetscErrorCode ierr; 843 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 844 PetscInt i,*owners = A->rmap->range; 845 PetscInt *nprocs,j,idx,nsends; 846 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 847 PetscInt *rvalues,count,base,slen,*source; 848 PetscInt *lens,*lrows,*values,m; 849 MPI_Comm comm = ((PetscObject)A)->comm; 850 MPI_Request *send_waits,*recv_waits; 851 MPI_Status recv_status,*send_status; 852 const PetscScalar *xx; 853 PetscScalar *bb,*mask; 854 Vec xmask,lmask; 855 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 856 const PetscInt *aj, *ii,*ridx; 857 PetscScalar *aa; 858 #if defined(PETSC_DEBUG) 859 PetscBool found = PETSC_FALSE; 860 #endif 861 862 PetscFunctionBegin; 863 /* first count number of contributors to each processor */ 864 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 865 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 866 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 867 j = 0; 868 for (i=0; i<N; i++) { 869 if (lastidx > (idx = rows[i])) j = 0; 870 lastidx = idx; 871 for (; j<size; j++) { 872 if (idx >= owners[j] && idx < owners[j+1]) { 873 nprocs[2*j]++; 874 nprocs[2*j+1] = 1; 875 owner[i] = j; 876 #if defined(PETSC_DEBUG) 877 found = PETSC_TRUE; 878 #endif 879 break; 880 } 881 } 882 #if defined(PETSC_DEBUG) 883 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 884 found = PETSC_FALSE; 885 #endif 886 } 887 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 888 889 /* inform other processors of number of messages and max length*/ 890 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 891 892 /* post receives: */ 893 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 894 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 895 for (i=0; i<nrecvs; i++) { 896 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 897 } 898 899 /* do sends: 900 1) starts[i] gives the starting index in svalues for stuff going to 901 the ith processor 902 */ 903 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 904 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 905 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 906 starts[0] = 0; 907 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 908 for (i=0; i<N; i++) { 909 svalues[starts[owner[i]]++] = rows[i]; 910 } 911 912 starts[0] = 0; 913 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 914 count = 0; 915 for (i=0; i<size; i++) { 916 if (nprocs[2*i+1]) { 917 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 918 } 919 } 920 ierr = PetscFree(starts);CHKERRQ(ierr); 921 922 base = owners[rank]; 923 924 /* wait on receives */ 925 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 926 count = nrecvs; slen = 0; 927 while (count) { 928 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 929 /* unpack receives into our local space */ 930 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 931 source[imdex] = recv_status.MPI_SOURCE; 932 lens[imdex] = n; 933 slen += n; 934 count--; 935 } 936 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 937 938 /* move the data into the send scatter */ 939 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 940 count = 0; 941 for (i=0; i<nrecvs; i++) { 942 values = rvalues + i*nmax; 943 for (j=0; j<lens[i]; j++) { 944 lrows[count++] = values[j] - base; 945 } 946 } 947 ierr = PetscFree(rvalues);CHKERRQ(ierr); 948 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 949 ierr = PetscFree(owner);CHKERRQ(ierr); 950 ierr = PetscFree(nprocs);CHKERRQ(ierr); 951 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 952 953 /* zero diagonal part of matrix */ 954 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 955 956 /* handle off diagonal part of matrix */ 957 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 958 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 959 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 960 for (i=0; i<slen; i++) { 961 bb[lrows[i]] = 1; 962 } 963 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 964 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 967 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 970 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 971 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 972 973 /* remove zeroed rows of off diagonal matrix */ 974 ii = aij->i; 975 for (i=0; i<slen; i++) { 976 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 977 } 978 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use){ 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i=0; i<m; i++){ 985 n = ii[i+1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij->a + ii[i]; 988 989 for (j=0; j<n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 bb[*ridx] -= *aa*xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i=0; i<m; i++) { 1002 n = ii[i+1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij->a + ii[i]; 1005 for (j=0; j<n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 bb[i] -= *aa*xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 1016 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1017 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1018 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1019 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1020 ierr = PetscFree(lrows);CHKERRQ(ierr); 1021 1022 /* wait on sends */ 1023 if (nsends) { 1024 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1025 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1026 ierr = PetscFree(send_status);CHKERRQ(ierr); 1027 } 1028 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1029 ierr = PetscFree(svalues);CHKERRQ(ierr); 1030 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #undef __FUNCT__ 1035 #define __FUNCT__ "MatMult_MPIAIJ" 1036 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1037 { 1038 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1039 PetscErrorCode ierr; 1040 PetscInt nt; 1041 1042 PetscFunctionBegin; 1043 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1044 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1045 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1048 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1054 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1066 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1067 { 1068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1074 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1075 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1081 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1082 { 1083 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1084 PetscErrorCode ierr; 1085 PetscBool merged; 1086 1087 PetscFunctionBegin; 1088 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1089 /* do nondiagonal part */ 1090 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1091 if (!merged) { 1092 /* send it on its way */ 1093 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1094 /* do local part */ 1095 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1096 /* receive remote parts: note this assumes the values are not actually */ 1097 /* added in yy until the next line, */ 1098 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1099 } else { 1100 /* do local part */ 1101 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1102 /* send it on its way */ 1103 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1104 /* values actually were received in the Begin() but we need to call this nop */ 1105 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1106 } 1107 PetscFunctionReturn(0); 1108 } 1109 1110 EXTERN_C_BEGIN 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1113 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1114 { 1115 MPI_Comm comm; 1116 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1117 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1118 IS Me,Notme; 1119 PetscErrorCode ierr; 1120 PetscInt M,N,first,last,*notme,i; 1121 PetscMPIInt size; 1122 1123 PetscFunctionBegin; 1124 1125 /* Easy test: symmetric diagonal block */ 1126 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1127 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1128 if (!*f) PetscFunctionReturn(0); 1129 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1130 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1131 if (size == 1) PetscFunctionReturn(0); 1132 1133 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1134 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1135 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1136 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1137 for (i=0; i<first; i++) notme[i] = i; 1138 for (i=last; i<M; i++) notme[i-last+first] = i; 1139 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1140 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1141 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1142 Aoff = Aoffs[0]; 1143 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1144 Boff = Boffs[0]; 1145 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1146 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1147 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1148 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1149 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1150 ierr = PetscFree(notme);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 EXTERN_C_END 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1157 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1158 { 1159 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 /* do nondiagonal part */ 1164 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1165 /* send it on its way */ 1166 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1167 /* do local part */ 1168 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1169 /* receive remote parts */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 /* 1175 This only works correctly for square matrices where the subblock A->A is the 1176 diagonal block 1177 */ 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1180 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1181 { 1182 PetscErrorCode ierr; 1183 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1184 1185 PetscFunctionBegin; 1186 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1187 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1188 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatScale_MPIAIJ" 1194 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1195 { 1196 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1201 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatDestroy_MPIAIJ" 1207 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 #if defined(PETSC_USE_LOG) 1214 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1215 #endif 1216 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1217 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1218 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1219 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1220 #if defined (PETSC_USE_CTABLE) 1221 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1222 #else 1223 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1224 #endif 1225 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1226 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1227 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1228 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1229 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1230 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1231 1232 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1246 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1247 { 1248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1249 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1250 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1251 PetscErrorCode ierr; 1252 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1253 int fd; 1254 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1255 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1256 PetscScalar *column_values; 1257 PetscInt message_count,flowcontrolcount; 1258 1259 PetscFunctionBegin; 1260 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1261 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1262 nz = A->nz + B->nz; 1263 if (!rank) { 1264 header[0] = MAT_FILE_CLASSID; 1265 header[1] = mat->rmap->N; 1266 header[2] = mat->cmap->N; 1267 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1268 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 /* get largest number of rows any processor has */ 1271 rlen = mat->rmap->n; 1272 range = mat->rmap->range; 1273 for (i=1; i<size; i++) { 1274 rlen = PetscMax(rlen,range[i+1] - range[i]); 1275 } 1276 } else { 1277 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1278 rlen = mat->rmap->n; 1279 } 1280 1281 /* load up the local row counts */ 1282 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1283 for (i=0; i<mat->rmap->n; i++) { 1284 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1285 } 1286 1287 /* store the row lengths to the file */ 1288 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1289 if (!rank) { 1290 MPI_Status status; 1291 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1292 for (i=1; i<size; i++) { 1293 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1294 rlen = range[i+1] - range[i]; 1295 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1296 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1297 } 1298 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1299 } else { 1300 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1301 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1302 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1305 1306 /* load up the local column indices */ 1307 nzmax = nz; /* )th processor needs space a largest processor needs */ 1308 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1309 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1310 cnt = 0; 1311 for (i=0; i<mat->rmap->n; i++) { 1312 for (j=B->i[i]; j<B->i[i+1]; j++) { 1313 if ( (col = garray[B->j[j]]) > cstart) break; 1314 column_indices[cnt++] = col; 1315 } 1316 for (k=A->i[i]; k<A->i[i+1]; k++) { 1317 column_indices[cnt++] = A->j[k] + cstart; 1318 } 1319 for (; j<B->i[i+1]; j++) { 1320 column_indices[cnt++] = garray[B->j[j]]; 1321 } 1322 } 1323 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1324 1325 /* store the column indices to the file */ 1326 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1327 if (!rank) { 1328 MPI_Status status; 1329 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1332 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1333 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1334 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 } 1337 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1340 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1341 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1342 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1343 } 1344 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1345 1346 /* load up the local column values */ 1347 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ( garray[B->j[j]] > cstart) break; 1352 column_values[cnt++] = B->a[j]; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) { 1355 column_values[cnt++] = A->a[k]; 1356 } 1357 for (; j<B->i[i+1]; j++) { 1358 column_values[cnt++] = B->a[j]; 1359 } 1360 } 1361 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1362 1363 /* store the column values to the file */ 1364 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1365 if (!rank) { 1366 MPI_Status status; 1367 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1368 for (i=1; i<size; i++) { 1369 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1370 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1371 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1372 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1373 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1378 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1381 } 1382 ierr = PetscFree(column_values);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1388 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1389 { 1390 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1391 PetscErrorCode ierr; 1392 PetscMPIInt rank = aij->rank,size = aij->size; 1393 PetscBool isdraw,iascii,isbinary; 1394 PetscViewer sviewer; 1395 PetscViewerFormat format; 1396 1397 PetscFunctionBegin; 1398 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1399 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1400 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1401 if (iascii) { 1402 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1403 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1404 MatInfo info; 1405 PetscBool inodes; 1406 1407 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1408 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1409 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1410 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1411 if (!inodes) { 1412 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1413 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1414 } else { 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1416 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1417 } 1418 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1420 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1423 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1424 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1425 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1428 PetscInt inodecount,inodelimit,*inodes; 1429 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1430 if (inodes) { 1431 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1432 } else { 1433 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1434 } 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1437 PetscFunctionReturn(0); 1438 } 1439 } else if (isbinary) { 1440 if (size == 1) { 1441 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1442 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1443 } else { 1444 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1445 } 1446 PetscFunctionReturn(0); 1447 } else if (isdraw) { 1448 PetscDraw draw; 1449 PetscBool isnull; 1450 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1451 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1452 } 1453 1454 if (size == 1) { 1455 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1456 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1457 } else { 1458 /* assemble the entire matrix onto first processor. */ 1459 Mat A; 1460 Mat_SeqAIJ *Aloc; 1461 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1462 MatScalar *a; 1463 1464 if (mat->rmap->N > 1024) { 1465 PetscBool flg = PETSC_FALSE; 1466 1467 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1468 if (!flg) { 1469 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1470 } 1471 } 1472 1473 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1474 if (!rank) { 1475 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1476 } else { 1477 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1478 } 1479 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1480 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1481 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1483 1484 /* copy over the A part */ 1485 Aloc = (Mat_SeqAIJ*)aij->A->data; 1486 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1487 row = mat->rmap->rstart; 1488 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1489 for (i=0; i<m; i++) { 1490 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1491 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1492 } 1493 aj = Aloc->j; 1494 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1495 1496 /* copy over the B part */ 1497 Aloc = (Mat_SeqAIJ*)aij->B->data; 1498 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1499 row = mat->rmap->rstart; 1500 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1501 ct = cols; 1502 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1503 for (i=0; i<m; i++) { 1504 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1505 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1506 } 1507 ierr = PetscFree(ct);CHKERRQ(ierr); 1508 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1509 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1510 /* 1511 Everyone has to call to draw the matrix since the graphics waits are 1512 synchronized across all processors that share the PetscDraw object 1513 */ 1514 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1515 if (!rank) { 1516 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1517 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1518 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1519 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1520 } 1521 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1522 ierr = MatDestroy(&A);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatView_MPIAIJ" 1529 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1530 { 1531 PetscErrorCode ierr; 1532 PetscBool iascii,isdraw,issocket,isbinary; 1533 1534 PetscFunctionBegin; 1535 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1536 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1537 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1538 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1539 if (iascii || isdraw || isbinary || issocket) { 1540 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1541 } else { 1542 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1543 } 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatSOR_MPIAIJ" 1549 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1550 { 1551 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1552 PetscErrorCode ierr; 1553 Vec bb1 = 0; 1554 PetscBool hasop; 1555 1556 PetscFunctionBegin; 1557 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1558 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1559 } 1560 1561 if (flag == SOR_APPLY_UPPER) { 1562 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1567 if (flag & SOR_ZERO_INITIAL_GUESS) { 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1569 its--; 1570 } 1571 1572 while (its--) { 1573 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 1576 /* update rhs: bb1 = bb - B*x */ 1577 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1578 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1579 1580 /* local sweep */ 1581 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1582 } 1583 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1584 if (flag & SOR_ZERO_INITIAL_GUESS) { 1585 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1586 its--; 1587 } 1588 while (its--) { 1589 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 1592 /* update rhs: bb1 = bb - B*x */ 1593 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1594 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1595 1596 /* local sweep */ 1597 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1598 } 1599 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1600 if (flag & SOR_ZERO_INITIAL_GUESS) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 its--; 1603 } 1604 while (its--) { 1605 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 1608 /* update rhs: bb1 = bb - B*x */ 1609 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1610 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1611 1612 /* local sweep */ 1613 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1614 } 1615 } else if (flag & SOR_EISENSTAT) { 1616 Vec xx1; 1617 1618 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1620 1621 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 if (!mat->diag) { 1624 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1625 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1626 } 1627 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1628 if (hasop) { 1629 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1630 } else { 1631 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1632 } 1633 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1634 1635 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1636 1637 /* local sweep */ 1638 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1639 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1640 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1641 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1642 1643 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatPermute_MPIAIJ" 1649 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1650 { 1651 MPI_Comm comm,pcomm; 1652 PetscInt first,local_size,nrows; 1653 const PetscInt *rows; 1654 PetscMPIInt size; 1655 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1660 /* make a collective version of 'rowp' */ 1661 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1662 if (pcomm==comm) { 1663 crowp = rowp; 1664 } else { 1665 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1666 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1667 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1668 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1669 } 1670 /* collect the global row permutation and invert it */ 1671 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1672 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1673 if (pcomm!=comm) { 1674 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1675 } 1676 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1677 /* get the local target indices */ 1678 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1679 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1680 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1681 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1682 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1683 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1684 /* the column permutation is so much easier; 1685 make a local version of 'colp' and invert it */ 1686 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1687 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1688 if (size==1) { 1689 lcolp = colp; 1690 } else { 1691 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1692 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1693 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1694 } 1695 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1696 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1697 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1698 if (size>1) { 1699 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1700 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1701 } 1702 /* now we just get the submatrix */ 1703 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1704 /* clean up */ 1705 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1706 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1712 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1713 { 1714 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1715 Mat A = mat->A,B = mat->B; 1716 PetscErrorCode ierr; 1717 PetscReal isend[5],irecv[5]; 1718 1719 PetscFunctionBegin; 1720 info->block_size = 1.0; 1721 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1722 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1723 isend[3] = info->memory; isend[4] = info->mallocs; 1724 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1725 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1726 isend[3] += info->memory; isend[4] += info->mallocs; 1727 if (flag == MAT_LOCAL) { 1728 info->nz_used = isend[0]; 1729 info->nz_allocated = isend[1]; 1730 info->nz_unneeded = isend[2]; 1731 info->memory = isend[3]; 1732 info->mallocs = isend[4]; 1733 } else if (flag == MAT_GLOBAL_MAX) { 1734 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1735 info->nz_used = irecv[0]; 1736 info->nz_allocated = irecv[1]; 1737 info->nz_unneeded = irecv[2]; 1738 info->memory = irecv[3]; 1739 info->mallocs = irecv[4]; 1740 } else if (flag == MAT_GLOBAL_SUM) { 1741 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1742 info->nz_used = irecv[0]; 1743 info->nz_allocated = irecv[1]; 1744 info->nz_unneeded = irecv[2]; 1745 info->memory = irecv[3]; 1746 info->mallocs = irecv[4]; 1747 } 1748 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1749 info->fill_ratio_needed = 0; 1750 info->factor_mallocs = 0; 1751 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatSetOption_MPIAIJ" 1757 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1758 { 1759 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1760 PetscErrorCode ierr; 1761 1762 PetscFunctionBegin; 1763 switch (op) { 1764 case MAT_NEW_NONZERO_LOCATIONS: 1765 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1766 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1767 case MAT_KEEP_NONZERO_PATTERN: 1768 case MAT_NEW_NONZERO_LOCATION_ERR: 1769 case MAT_USE_INODES: 1770 case MAT_IGNORE_ZERO_ENTRIES: 1771 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1772 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1773 break; 1774 case MAT_ROW_ORIENTED: 1775 a->roworiented = flg; 1776 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1777 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1778 break; 1779 case MAT_NEW_DIAGONALS: 1780 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1781 break; 1782 case MAT_IGNORE_OFF_PROC_ENTRIES: 1783 a->donotstash = PETSC_TRUE; 1784 break; 1785 case MAT_SPD: 1786 A->spd_set = PETSC_TRUE; 1787 A->spd = flg; 1788 if (flg) { 1789 A->symmetric = PETSC_TRUE; 1790 A->structurally_symmetric = PETSC_TRUE; 1791 A->symmetric_set = PETSC_TRUE; 1792 A->structurally_symmetric_set = PETSC_TRUE; 1793 } 1794 break; 1795 case MAT_SYMMETRIC: 1796 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1797 break; 1798 case MAT_STRUCTURALLY_SYMMETRIC: 1799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_HERMITIAN: 1802 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1803 break; 1804 case MAT_SYMMETRY_ETERNAL: 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 break; 1807 default: 1808 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatGetRow_MPIAIJ" 1815 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1816 { 1817 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1818 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1819 PetscErrorCode ierr; 1820 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1821 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1822 PetscInt *cmap,*idx_p; 1823 1824 PetscFunctionBegin; 1825 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1826 mat->getrowactive = PETSC_TRUE; 1827 1828 if (!mat->rowvalues && (idx || v)) { 1829 /* 1830 allocate enough space to hold information from the longest row. 1831 */ 1832 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1833 PetscInt max = 1,tmp; 1834 for (i=0; i<matin->rmap->n; i++) { 1835 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1836 if (max < tmp) { max = tmp; } 1837 } 1838 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1839 } 1840 1841 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1842 lrow = row - rstart; 1843 1844 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1845 if (!v) {pvA = 0; pvB = 0;} 1846 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1847 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1848 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1849 nztot = nzA + nzB; 1850 1851 cmap = mat->garray; 1852 if (v || idx) { 1853 if (nztot) { 1854 /* Sort by increasing column numbers, assuming A and B already sorted */ 1855 PetscInt imark = -1; 1856 if (v) { 1857 *v = v_p = mat->rowvalues; 1858 for (i=0; i<nzB; i++) { 1859 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1860 else break; 1861 } 1862 imark = i; 1863 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1864 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1865 } 1866 if (idx) { 1867 *idx = idx_p = mat->rowindices; 1868 if (imark > -1) { 1869 for (i=0; i<imark; i++) { 1870 idx_p[i] = cmap[cworkB[i]]; 1871 } 1872 } else { 1873 for (i=0; i<nzB; i++) { 1874 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1875 else break; 1876 } 1877 imark = i; 1878 } 1879 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1880 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1881 } 1882 } else { 1883 if (idx) *idx = 0; 1884 if (v) *v = 0; 1885 } 1886 } 1887 *nz = nztot; 1888 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1889 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1895 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1896 { 1897 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1898 1899 PetscFunctionBegin; 1900 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1901 aij->getrowactive = PETSC_FALSE; 1902 PetscFunctionReturn(0); 1903 } 1904 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "MatNorm_MPIAIJ" 1907 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1908 { 1909 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1910 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1911 PetscErrorCode ierr; 1912 PetscInt i,j,cstart = mat->cmap->rstart; 1913 PetscReal sum = 0.0; 1914 MatScalar *v; 1915 1916 PetscFunctionBegin; 1917 if (aij->size == 1) { 1918 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1919 } else { 1920 if (type == NORM_FROBENIUS) { 1921 v = amat->a; 1922 for (i=0; i<amat->nz; i++) { 1923 #if defined(PETSC_USE_COMPLEX) 1924 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1925 #else 1926 sum += (*v)*(*v); v++; 1927 #endif 1928 } 1929 v = bmat->a; 1930 for (i=0; i<bmat->nz; i++) { 1931 #if defined(PETSC_USE_COMPLEX) 1932 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1933 #else 1934 sum += (*v)*(*v); v++; 1935 #endif 1936 } 1937 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1938 *norm = sqrt(*norm); 1939 } else if (type == NORM_1) { /* max column norm */ 1940 PetscReal *tmp,*tmp2; 1941 PetscInt *jj,*garray = aij->garray; 1942 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1943 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1944 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1945 *norm = 0.0; 1946 v = amat->a; jj = amat->j; 1947 for (j=0; j<amat->nz; j++) { 1948 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1949 } 1950 v = bmat->a; jj = bmat->j; 1951 for (j=0; j<bmat->nz; j++) { 1952 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1953 } 1954 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1955 for (j=0; j<mat->cmap->N; j++) { 1956 if (tmp2[j] > *norm) *norm = tmp2[j]; 1957 } 1958 ierr = PetscFree(tmp);CHKERRQ(ierr); 1959 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1960 } else if (type == NORM_INFINITY) { /* max row norm */ 1961 PetscReal ntemp = 0.0; 1962 for (j=0; j<aij->A->rmap->n; j++) { 1963 v = amat->a + amat->i[j]; 1964 sum = 0.0; 1965 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1966 sum += PetscAbsScalar(*v); v++; 1967 } 1968 v = bmat->a + bmat->i[j]; 1969 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1970 sum += PetscAbsScalar(*v); v++; 1971 } 1972 if (sum > ntemp) ntemp = sum; 1973 } 1974 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1975 } else { 1976 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1977 } 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "MatTranspose_MPIAIJ" 1984 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1985 { 1986 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1987 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1988 PetscErrorCode ierr; 1989 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 1990 PetscInt cstart=A->cmap->rstart,ncol; 1991 Mat B; 1992 MatScalar *array; 1993 1994 PetscFunctionBegin; 1995 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1996 1997 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 1998 ai = Aloc->i; aj = Aloc->j; 1999 bi = Bloc->i; bj = Bloc->j; 2000 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2001 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2002 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2003 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2004 for (i=0; i<ai[ma]; i++){ 2005 d_nnz[aj[i]] ++; 2006 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2007 } 2008 2009 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2010 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2011 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2012 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2013 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2014 } else { 2015 B = *matout; 2016 } 2017 2018 /* copy over the A part */ 2019 array = Aloc->a; 2020 row = A->rmap->rstart; 2021 for (i=0; i<ma; i++) { 2022 ncol = ai[i+1]-ai[i]; 2023 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2024 row++; array += ncol; aj += ncol; 2025 } 2026 aj = Aloc->j; 2027 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2028 2029 /* copy over the B part */ 2030 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2031 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2032 array = Bloc->a; 2033 row = A->rmap->rstart; 2034 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2035 cols_tmp = cols; 2036 for (i=0; i<mb; i++) { 2037 ncol = bi[i+1]-bi[i]; 2038 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2039 row++; array += ncol; cols_tmp += ncol; 2040 } 2041 ierr = PetscFree(cols);CHKERRQ(ierr); 2042 2043 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2044 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2046 *matout = B; 2047 } else { 2048 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2049 } 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2055 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2056 { 2057 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2058 Mat a = aij->A,b = aij->B; 2059 PetscErrorCode ierr; 2060 PetscInt s1,s2,s3; 2061 2062 PetscFunctionBegin; 2063 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2064 if (rr) { 2065 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2066 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2067 /* Overlap communication with computation. */ 2068 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 } 2070 if (ll) { 2071 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2072 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2073 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2074 } 2075 /* scale the diagonal block */ 2076 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2077 2078 if (rr) { 2079 /* Do a scatter end and then right scale the off-diagonal block */ 2080 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2082 } 2083 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2089 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2090 { 2091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2096 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2097 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2098 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2099 PetscFunctionReturn(0); 2100 } 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2103 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2104 { 2105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2106 PetscErrorCode ierr; 2107 2108 PetscFunctionBegin; 2109 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2110 PetscFunctionReturn(0); 2111 } 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "MatEqual_MPIAIJ" 2115 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2116 { 2117 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2118 Mat a,b,c,d; 2119 PetscBool flg; 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 a = matA->A; b = matA->B; 2124 c = matB->A; d = matB->B; 2125 2126 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2127 if (flg) { 2128 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2129 } 2130 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatCopy_MPIAIJ" 2136 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2137 { 2138 PetscErrorCode ierr; 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2141 2142 PetscFunctionBegin; 2143 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2144 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2145 /* because of the column compression in the off-processor part of the matrix a->B, 2146 the number of columns in a->B and b->B may be different, hence we cannot call 2147 the MatCopy() directly on the two parts. If need be, we can provide a more 2148 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2149 then copying the submatrices */ 2150 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2151 } else { 2152 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2153 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2154 } 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2160 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2161 { 2162 PetscErrorCode ierr; 2163 2164 PetscFunctionBegin; 2165 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatAXPY_MPIAIJ" 2171 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2172 { 2173 PetscErrorCode ierr; 2174 PetscInt i; 2175 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2176 PetscBLASInt bnz,one=1; 2177 Mat_SeqAIJ *x,*y; 2178 2179 PetscFunctionBegin; 2180 if (str == SAME_NONZERO_PATTERN) { 2181 PetscScalar alpha = a; 2182 x = (Mat_SeqAIJ *)xx->A->data; 2183 y = (Mat_SeqAIJ *)yy->A->data; 2184 bnz = PetscBLASIntCast(x->nz); 2185 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2186 x = (Mat_SeqAIJ *)xx->B->data; 2187 y = (Mat_SeqAIJ *)yy->B->data; 2188 bnz = PetscBLASIntCast(x->nz); 2189 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2190 } else if (str == SUBSET_NONZERO_PATTERN) { 2191 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2192 2193 x = (Mat_SeqAIJ *)xx->B->data; 2194 y = (Mat_SeqAIJ *)yy->B->data; 2195 if (y->xtoy && y->XtoY != xx->B) { 2196 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2197 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2198 } 2199 if (!y->xtoy) { /* get xtoy */ 2200 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2201 y->XtoY = xx->B; 2202 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2203 } 2204 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2205 } else { 2206 Mat B; 2207 PetscInt *nnz_d,*nnz_o; 2208 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2209 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2210 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2211 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2212 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2213 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2214 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2215 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2216 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2217 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2218 ierr = MatHeaderReplace(Y,B); 2219 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2220 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2221 } 2222 PetscFunctionReturn(0); 2223 } 2224 2225 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatConjugate_MPIAIJ" 2229 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2230 { 2231 #if defined(PETSC_USE_COMPLEX) 2232 PetscErrorCode ierr; 2233 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2234 2235 PetscFunctionBegin; 2236 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2237 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2238 #else 2239 PetscFunctionBegin; 2240 #endif 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatRealPart_MPIAIJ" 2246 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2247 { 2248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2253 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2259 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2260 { 2261 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2266 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #ifdef PETSC_HAVE_PBGL 2271 2272 #include <boost/parallel/mpi/bsp_process_group.hpp> 2273 #include <boost/graph/distributed/ilu_default_graph.hpp> 2274 #include <boost/graph/distributed/ilu_0_block.hpp> 2275 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2276 #include <boost/graph/distributed/petsc/interface.hpp> 2277 #include <boost/multi_array.hpp> 2278 #include <boost/parallel/distributed_property_map->hpp> 2279 2280 #undef __FUNCT__ 2281 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2282 /* 2283 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2284 */ 2285 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2286 { 2287 namespace petsc = boost::distributed::petsc; 2288 2289 namespace graph_dist = boost::graph::distributed; 2290 using boost::graph::distributed::ilu_default::process_group_type; 2291 using boost::graph::ilu_permuted; 2292 2293 PetscBool row_identity, col_identity; 2294 PetscContainer c; 2295 PetscInt m, n, M, N; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2300 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2301 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2302 if (!row_identity || !col_identity) { 2303 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2304 } 2305 2306 process_group_type pg; 2307 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2308 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2309 lgraph_type& level_graph = *lgraph_p; 2310 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2311 2312 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2313 ilu_permuted(level_graph); 2314 2315 /* put together the new matrix */ 2316 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2317 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2318 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2319 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2320 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2321 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2322 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2323 2324 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2325 ierr = PetscContainerSetPointer(c, lgraph_p); 2326 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2327 ierr = PetscContainerDestroy(&c); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 #undef __FUNCT__ 2332 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2333 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2334 { 2335 PetscFunctionBegin; 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatSolve_MPIAIJ" 2341 /* 2342 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2343 */ 2344 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2345 { 2346 namespace graph_dist = boost::graph::distributed; 2347 2348 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2349 lgraph_type* lgraph_p; 2350 PetscContainer c; 2351 PetscErrorCode ierr; 2352 2353 PetscFunctionBegin; 2354 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2355 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2356 ierr = VecCopy(b, x);CHKERRQ(ierr); 2357 2358 PetscScalar* array_x; 2359 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2360 PetscInt sx; 2361 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2362 2363 PetscScalar* array_b; 2364 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2365 PetscInt sb; 2366 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2367 2368 lgraph_type& level_graph = *lgraph_p; 2369 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2370 2371 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2372 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2373 ref_x(array_x, boost::extents[num_vertices(graph)]); 2374 2375 typedef boost::iterator_property_map<array_ref_type::iterator, 2376 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2377 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2378 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2379 2380 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2381 2382 PetscFunctionReturn(0); 2383 } 2384 #endif 2385 2386 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2387 PetscInt nzlocal,nsends,nrecvs; 2388 PetscMPIInt *send_rank,*recv_rank; 2389 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2390 PetscScalar *sbuf_a,**rbuf_a; 2391 PetscErrorCode (*Destroy)(Mat); 2392 } Mat_Redundant; 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2396 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2397 { 2398 PetscErrorCode ierr; 2399 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2400 PetscInt i; 2401 2402 PetscFunctionBegin; 2403 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2404 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2405 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2406 for (i=0; i<redund->nrecvs; i++){ 2407 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2408 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2409 } 2410 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2411 ierr = PetscFree(redund);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatDestroy_MatRedundant" 2417 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2418 { 2419 PetscErrorCode ierr; 2420 PetscContainer container; 2421 Mat_Redundant *redund=PETSC_NULL; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2425 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2426 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2427 A->ops->destroy = redund->Destroy; 2428 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2429 if (A->ops->destroy) { 2430 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2431 } 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2437 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2438 { 2439 PetscMPIInt rank,size; 2440 MPI_Comm comm=((PetscObject)mat)->comm; 2441 PetscErrorCode ierr; 2442 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2443 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2444 PetscInt *rowrange=mat->rmap->range; 2445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2446 Mat A=aij->A,B=aij->B,C=*matredundant; 2447 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2448 PetscScalar *sbuf_a; 2449 PetscInt nzlocal=a->nz+b->nz; 2450 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2451 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2452 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2453 MatScalar *aworkA,*aworkB; 2454 PetscScalar *vals; 2455 PetscMPIInt tag1,tag2,tag3,imdex; 2456 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2457 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2458 MPI_Status recv_status,*send_status; 2459 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2460 PetscInt **rbuf_j=PETSC_NULL; 2461 PetscScalar **rbuf_a=PETSC_NULL; 2462 Mat_Redundant *redund=PETSC_NULL; 2463 PetscContainer container; 2464 2465 PetscFunctionBegin; 2466 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2467 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2468 2469 if (reuse == MAT_REUSE_MATRIX) { 2470 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2471 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2472 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2473 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2474 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2475 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2476 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2477 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2478 2479 nsends = redund->nsends; 2480 nrecvs = redund->nrecvs; 2481 send_rank = redund->send_rank; 2482 recv_rank = redund->recv_rank; 2483 sbuf_nz = redund->sbuf_nz; 2484 rbuf_nz = redund->rbuf_nz; 2485 sbuf_j = redund->sbuf_j; 2486 sbuf_a = redund->sbuf_a; 2487 rbuf_j = redund->rbuf_j; 2488 rbuf_a = redund->rbuf_a; 2489 } 2490 2491 if (reuse == MAT_INITIAL_MATRIX){ 2492 PetscMPIInt subrank,subsize; 2493 PetscInt nleftover,np_subcomm; 2494 /* get the destination processors' id send_rank, nsends and nrecvs */ 2495 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2496 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2497 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2498 np_subcomm = size/nsubcomm; 2499 nleftover = size - nsubcomm*np_subcomm; 2500 nsends = 0; nrecvs = 0; 2501 for (i=0; i<size; i++){ /* i=rank*/ 2502 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2503 send_rank[nsends] = i; nsends++; 2504 recv_rank[nrecvs++] = i; 2505 } 2506 } 2507 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2508 i = size-nleftover-1; 2509 j = 0; 2510 while (j < nsubcomm - nleftover){ 2511 send_rank[nsends++] = i; 2512 i--; j++; 2513 } 2514 } 2515 2516 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2517 for (i=0; i<nleftover; i++){ 2518 recv_rank[nrecvs++] = size-nleftover+i; 2519 } 2520 } 2521 2522 /* allocate sbuf_j, sbuf_a */ 2523 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2524 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2525 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2526 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2527 2528 /* copy mat's local entries into the buffers */ 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 rownz_max = 0; 2531 rptr = sbuf_j; 2532 cols = sbuf_j + rend-rstart + 1; 2533 vals = sbuf_a; 2534 rptr[0] = 0; 2535 for (i=0; i<rend-rstart; i++){ 2536 row = i + rstart; 2537 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2538 ncols = nzA + nzB; 2539 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2540 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2541 /* load the column indices for this row into cols */ 2542 lwrite = 0; 2543 for (l=0; l<nzB; l++) { 2544 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2545 vals[lwrite] = aworkB[l]; 2546 cols[lwrite++] = ctmp; 2547 } 2548 } 2549 for (l=0; l<nzA; l++){ 2550 vals[lwrite] = aworkA[l]; 2551 cols[lwrite++] = cstart + cworkA[l]; 2552 } 2553 for (l=0; l<nzB; l++) { 2554 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2555 vals[lwrite] = aworkB[l]; 2556 cols[lwrite++] = ctmp; 2557 } 2558 } 2559 vals += ncols; 2560 cols += ncols; 2561 rptr[i+1] = rptr[i] + ncols; 2562 if (rownz_max < ncols) rownz_max = ncols; 2563 } 2564 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2565 } else { /* only copy matrix values into sbuf_a */ 2566 rptr = sbuf_j; 2567 vals = sbuf_a; 2568 rptr[0] = 0; 2569 for (i=0; i<rend-rstart; i++){ 2570 row = i + rstart; 2571 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2572 ncols = nzA + nzB; 2573 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2574 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2575 lwrite = 0; 2576 for (l=0; l<nzB; l++) { 2577 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2578 } 2579 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2580 for (l=0; l<nzB; l++) { 2581 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2582 } 2583 vals += ncols; 2584 rptr[i+1] = rptr[i] + ncols; 2585 } 2586 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2587 2588 /* send nzlocal to others, and recv other's nzlocal */ 2589 /*--------------------------------------------------*/ 2590 if (reuse == MAT_INITIAL_MATRIX){ 2591 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2592 s_waits2 = s_waits3 + nsends; 2593 s_waits1 = s_waits2 + nsends; 2594 r_waits1 = s_waits1 + nsends; 2595 r_waits2 = r_waits1 + nrecvs; 2596 r_waits3 = r_waits2 + nrecvs; 2597 } else { 2598 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2599 r_waits3 = s_waits3 + nsends; 2600 } 2601 2602 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2603 if (reuse == MAT_INITIAL_MATRIX){ 2604 /* get new tags to keep the communication clean */ 2605 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2606 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2607 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2608 2609 /* post receives of other's nzlocal */ 2610 for (i=0; i<nrecvs; i++){ 2611 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2612 } 2613 /* send nzlocal to others */ 2614 for (i=0; i<nsends; i++){ 2615 sbuf_nz[i] = nzlocal; 2616 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2617 } 2618 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2619 count = nrecvs; 2620 while (count) { 2621 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2622 recv_rank[imdex] = recv_status.MPI_SOURCE; 2623 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2624 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2625 2626 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2627 rbuf_nz[imdex] += i + 2; 2628 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2629 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2630 count--; 2631 } 2632 /* wait on sends of nzlocal */ 2633 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2634 /* send mat->i,j to others, and recv from other's */ 2635 /*------------------------------------------------*/ 2636 for (i=0; i<nsends; i++){ 2637 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2638 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2639 } 2640 /* wait on receives of mat->i,j */ 2641 /*------------------------------*/ 2642 count = nrecvs; 2643 while (count) { 2644 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2645 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2646 count--; 2647 } 2648 /* wait on sends of mat->i,j */ 2649 /*---------------------------*/ 2650 if (nsends) { 2651 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2652 } 2653 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2654 2655 /* post receives, send and receive mat->a */ 2656 /*----------------------------------------*/ 2657 for (imdex=0; imdex<nrecvs; imdex++) { 2658 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2659 } 2660 for (i=0; i<nsends; i++){ 2661 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2662 } 2663 count = nrecvs; 2664 while (count) { 2665 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2666 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2667 count--; 2668 } 2669 if (nsends) { 2670 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2671 } 2672 2673 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2674 2675 /* create redundant matrix */ 2676 /*-------------------------*/ 2677 if (reuse == MAT_INITIAL_MATRIX){ 2678 /* compute rownz_max for preallocation */ 2679 for (imdex=0; imdex<nrecvs; imdex++){ 2680 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2681 rptr = rbuf_j[imdex]; 2682 for (i=0; i<j; i++){ 2683 ncols = rptr[i+1] - rptr[i]; 2684 if (rownz_max < ncols) rownz_max = ncols; 2685 } 2686 } 2687 2688 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2689 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2690 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2691 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2692 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2693 } else { 2694 C = *matredundant; 2695 } 2696 2697 /* insert local matrix entries */ 2698 rptr = sbuf_j; 2699 cols = sbuf_j + rend-rstart + 1; 2700 vals = sbuf_a; 2701 for (i=0; i<rend-rstart; i++){ 2702 row = i + rstart; 2703 ncols = rptr[i+1] - rptr[i]; 2704 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2705 vals += ncols; 2706 cols += ncols; 2707 } 2708 /* insert received matrix entries */ 2709 for (imdex=0; imdex<nrecvs; imdex++){ 2710 rstart = rowrange[recv_rank[imdex]]; 2711 rend = rowrange[recv_rank[imdex]+1]; 2712 rptr = rbuf_j[imdex]; 2713 cols = rbuf_j[imdex] + rend-rstart + 1; 2714 vals = rbuf_a[imdex]; 2715 for (i=0; i<rend-rstart; i++){ 2716 row = i + rstart; 2717 ncols = rptr[i+1] - rptr[i]; 2718 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2719 vals += ncols; 2720 cols += ncols; 2721 } 2722 } 2723 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2725 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2726 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2727 if (reuse == MAT_INITIAL_MATRIX) { 2728 PetscContainer container; 2729 *matredundant = C; 2730 /* create a supporting struct and attach it to C for reuse */ 2731 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2732 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2733 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2734 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2735 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2736 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2737 2738 redund->nzlocal = nzlocal; 2739 redund->nsends = nsends; 2740 redund->nrecvs = nrecvs; 2741 redund->send_rank = send_rank; 2742 redund->recv_rank = recv_rank; 2743 redund->sbuf_nz = sbuf_nz; 2744 redund->rbuf_nz = rbuf_nz; 2745 redund->sbuf_j = sbuf_j; 2746 redund->sbuf_a = sbuf_a; 2747 redund->rbuf_j = rbuf_j; 2748 redund->rbuf_a = rbuf_a; 2749 2750 redund->Destroy = C->ops->destroy; 2751 C->ops->destroy = MatDestroy_MatRedundant; 2752 } 2753 PetscFunctionReturn(0); 2754 } 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2758 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2759 { 2760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2761 PetscErrorCode ierr; 2762 PetscInt i,*idxb = 0; 2763 PetscScalar *va,*vb; 2764 Vec vtmp; 2765 2766 PetscFunctionBegin; 2767 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2768 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2769 if (idx) { 2770 for (i=0; i<A->rmap->n; i++) { 2771 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2772 } 2773 } 2774 2775 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2776 if (idx) { 2777 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2778 } 2779 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2780 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2781 2782 for (i=0; i<A->rmap->n; i++){ 2783 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2784 va[i] = vb[i]; 2785 if (idx) idx[i] = a->garray[idxb[i]]; 2786 } 2787 } 2788 2789 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2790 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2791 ierr = PetscFree(idxb);CHKERRQ(ierr); 2792 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2798 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2799 { 2800 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2801 PetscErrorCode ierr; 2802 PetscInt i,*idxb = 0; 2803 PetscScalar *va,*vb; 2804 Vec vtmp; 2805 2806 PetscFunctionBegin; 2807 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2808 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2809 if (idx) { 2810 for (i=0; i<A->cmap->n; i++) { 2811 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2812 } 2813 } 2814 2815 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2816 if (idx) { 2817 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2818 } 2819 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2820 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2821 2822 for (i=0; i<A->rmap->n; i++){ 2823 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2824 va[i] = vb[i]; 2825 if (idx) idx[i] = a->garray[idxb[i]]; 2826 } 2827 } 2828 2829 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2830 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2831 ierr = PetscFree(idxb);CHKERRQ(ierr); 2832 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2838 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2839 { 2840 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2841 PetscInt n = A->rmap->n; 2842 PetscInt cstart = A->cmap->rstart; 2843 PetscInt *cmap = mat->garray; 2844 PetscInt *diagIdx, *offdiagIdx; 2845 Vec diagV, offdiagV; 2846 PetscScalar *a, *diagA, *offdiagA; 2847 PetscInt r; 2848 PetscErrorCode ierr; 2849 2850 PetscFunctionBegin; 2851 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2852 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2853 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2854 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2855 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2856 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2857 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2858 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2859 for(r = 0; r < n; ++r) { 2860 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2861 a[r] = diagA[r]; 2862 idx[r] = cstart + diagIdx[r]; 2863 } else { 2864 a[r] = offdiagA[r]; 2865 idx[r] = cmap[offdiagIdx[r]]; 2866 } 2867 } 2868 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2870 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2871 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2872 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2873 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2874 PetscFunctionReturn(0); 2875 } 2876 2877 #undef __FUNCT__ 2878 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2879 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2880 { 2881 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2882 PetscInt n = A->rmap->n; 2883 PetscInt cstart = A->cmap->rstart; 2884 PetscInt *cmap = mat->garray; 2885 PetscInt *diagIdx, *offdiagIdx; 2886 Vec diagV, offdiagV; 2887 PetscScalar *a, *diagA, *offdiagA; 2888 PetscInt r; 2889 PetscErrorCode ierr; 2890 2891 PetscFunctionBegin; 2892 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2893 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2894 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2895 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2896 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2897 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2898 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2899 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2900 for(r = 0; r < n; ++r) { 2901 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2902 a[r] = diagA[r]; 2903 idx[r] = cstart + diagIdx[r]; 2904 } else { 2905 a[r] = offdiagA[r]; 2906 idx[r] = cmap[offdiagIdx[r]]; 2907 } 2908 } 2909 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2910 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2911 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2912 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2913 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2914 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2915 PetscFunctionReturn(0); 2916 } 2917 2918 #undef __FUNCT__ 2919 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2920 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2921 { 2922 PetscErrorCode ierr; 2923 Mat *dummy; 2924 2925 PetscFunctionBegin; 2926 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2927 *newmat = *dummy; 2928 ierr = PetscFree(dummy);CHKERRQ(ierr); 2929 PetscFunctionReturn(0); 2930 } 2931 2932 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2933 /* -------------------------------------------------------------------*/ 2934 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2935 MatGetRow_MPIAIJ, 2936 MatRestoreRow_MPIAIJ, 2937 MatMult_MPIAIJ, 2938 /* 4*/ MatMultAdd_MPIAIJ, 2939 MatMultTranspose_MPIAIJ, 2940 MatMultTransposeAdd_MPIAIJ, 2941 #ifdef PETSC_HAVE_PBGL 2942 MatSolve_MPIAIJ, 2943 #else 2944 0, 2945 #endif 2946 0, 2947 0, 2948 /*10*/ 0, 2949 0, 2950 0, 2951 MatSOR_MPIAIJ, 2952 MatTranspose_MPIAIJ, 2953 /*15*/ MatGetInfo_MPIAIJ, 2954 MatEqual_MPIAIJ, 2955 MatGetDiagonal_MPIAIJ, 2956 MatDiagonalScale_MPIAIJ, 2957 MatNorm_MPIAIJ, 2958 /*20*/ MatAssemblyBegin_MPIAIJ, 2959 MatAssemblyEnd_MPIAIJ, 2960 MatSetOption_MPIAIJ, 2961 MatZeroEntries_MPIAIJ, 2962 /*24*/ MatZeroRows_MPIAIJ, 2963 0, 2964 #ifdef PETSC_HAVE_PBGL 2965 0, 2966 #else 2967 0, 2968 #endif 2969 0, 2970 0, 2971 /*29*/ MatSetUpPreallocation_MPIAIJ, 2972 #ifdef PETSC_HAVE_PBGL 2973 0, 2974 #else 2975 0, 2976 #endif 2977 0, 2978 0, 2979 0, 2980 /*34*/ MatDuplicate_MPIAIJ, 2981 0, 2982 0, 2983 0, 2984 0, 2985 /*39*/ MatAXPY_MPIAIJ, 2986 MatGetSubMatrices_MPIAIJ, 2987 MatIncreaseOverlap_MPIAIJ, 2988 MatGetValues_MPIAIJ, 2989 MatCopy_MPIAIJ, 2990 /*44*/ MatGetRowMax_MPIAIJ, 2991 MatScale_MPIAIJ, 2992 0, 2993 0, 2994 MatZeroRowsColumns_MPIAIJ, 2995 /*49*/ MatSetBlockSize_MPIAIJ, 2996 0, 2997 0, 2998 0, 2999 0, 3000 /*54*/ MatFDColoringCreate_MPIAIJ, 3001 0, 3002 MatSetUnfactored_MPIAIJ, 3003 MatPermute_MPIAIJ, 3004 0, 3005 /*59*/ MatGetSubMatrix_MPIAIJ, 3006 MatDestroy_MPIAIJ, 3007 MatView_MPIAIJ, 3008 0, 3009 0, 3010 /*64*/ 0, 3011 0, 3012 0, 3013 0, 3014 0, 3015 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3016 MatGetRowMinAbs_MPIAIJ, 3017 0, 3018 MatSetColoring_MPIAIJ, 3019 #if defined(PETSC_HAVE_ADIC) 3020 MatSetValuesAdic_MPIAIJ, 3021 #else 3022 0, 3023 #endif 3024 MatSetValuesAdifor_MPIAIJ, 3025 /*75*/ MatFDColoringApply_AIJ, 3026 0, 3027 0, 3028 0, 3029 0, 3030 /*80*/ 0, 3031 0, 3032 0, 3033 /*83*/ MatLoad_MPIAIJ, 3034 0, 3035 0, 3036 0, 3037 0, 3038 0, 3039 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3040 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3041 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3042 MatPtAP_Basic, 3043 MatPtAPSymbolic_MPIAIJ, 3044 /*94*/ MatPtAPNumeric_MPIAIJ, 3045 0, 3046 0, 3047 0, 3048 0, 3049 /*99*/ 0, 3050 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3051 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3052 MatConjugate_MPIAIJ, 3053 0, 3054 /*104*/MatSetValuesRow_MPIAIJ, 3055 MatRealPart_MPIAIJ, 3056 MatImaginaryPart_MPIAIJ, 3057 0, 3058 0, 3059 /*109*/0, 3060 MatGetRedundantMatrix_MPIAIJ, 3061 MatGetRowMin_MPIAIJ, 3062 0, 3063 0, 3064 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3065 0, 3066 0, 3067 0, 3068 0, 3069 /*119*/0, 3070 0, 3071 0, 3072 0, 3073 MatGetMultiProcBlock_MPIAIJ, 3074 /*124*/MatFindNonZeroRows_MPIAIJ, 3075 MatGetColumnNorms_MPIAIJ, 3076 0, 3077 0, 3078 MatGetSubMatricesParallel_MPIAIJ 3079 }; 3080 3081 /* ----------------------------------------------------------------------------------------*/ 3082 3083 EXTERN_C_BEGIN 3084 #undef __FUNCT__ 3085 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3086 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3087 { 3088 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3089 PetscErrorCode ierr; 3090 3091 PetscFunctionBegin; 3092 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3093 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 EXTERN_C_END 3097 3098 EXTERN_C_BEGIN 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3101 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3102 { 3103 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3104 PetscErrorCode ierr; 3105 3106 PetscFunctionBegin; 3107 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3108 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3109 PetscFunctionReturn(0); 3110 } 3111 EXTERN_C_END 3112 3113 EXTERN_C_BEGIN 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3116 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3117 { 3118 Mat_MPIAIJ *b; 3119 PetscErrorCode ierr; 3120 PetscInt i; 3121 3122 PetscFunctionBegin; 3123 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3124 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3125 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3126 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3127 3128 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3129 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3130 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3131 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3132 if (d_nnz) { 3133 for (i=0; i<B->rmap->n; i++) { 3134 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3135 } 3136 } 3137 if (o_nnz) { 3138 for (i=0; i<B->rmap->n; i++) { 3139 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3140 } 3141 } 3142 b = (Mat_MPIAIJ*)B->data; 3143 3144 if (!B->preallocated) { 3145 /* Explicitly create 2 MATSEQAIJ matrices. */ 3146 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3147 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3148 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3149 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3150 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3151 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3152 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3153 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3154 } 3155 3156 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3157 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3158 B->preallocated = PETSC_TRUE; 3159 PetscFunctionReturn(0); 3160 } 3161 EXTERN_C_END 3162 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3165 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3166 { 3167 Mat mat; 3168 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3169 PetscErrorCode ierr; 3170 3171 PetscFunctionBegin; 3172 *newmat = 0; 3173 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3174 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3175 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3176 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3177 a = (Mat_MPIAIJ*)mat->data; 3178 3179 mat->factortype = matin->factortype; 3180 mat->rmap->bs = matin->rmap->bs; 3181 mat->assembled = PETSC_TRUE; 3182 mat->insertmode = NOT_SET_VALUES; 3183 mat->preallocated = PETSC_TRUE; 3184 3185 a->size = oldmat->size; 3186 a->rank = oldmat->rank; 3187 a->donotstash = oldmat->donotstash; 3188 a->roworiented = oldmat->roworiented; 3189 a->rowindices = 0; 3190 a->rowvalues = 0; 3191 a->getrowactive = PETSC_FALSE; 3192 3193 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3194 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3195 3196 if (oldmat->colmap) { 3197 #if defined (PETSC_USE_CTABLE) 3198 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3199 #else 3200 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3201 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3202 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3203 #endif 3204 } else a->colmap = 0; 3205 if (oldmat->garray) { 3206 PetscInt len; 3207 len = oldmat->B->cmap->n; 3208 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3209 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3210 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3211 } else a->garray = 0; 3212 3213 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3214 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3215 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3216 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3217 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3218 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3219 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3220 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3221 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3222 *newmat = mat; 3223 PetscFunctionReturn(0); 3224 } 3225 3226 /* 3227 Allows sending/receiving larger messages then 2 gigabytes in a single call 3228 */ 3229 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm) 3230 { 3231 int ierr; 3232 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3233 PetscInt i,numchunks; 3234 PetscMPIInt icnt; 3235 3236 numchunks = cnt/CHUNKSIZE + 1; 3237 for (i=0; i<numchunks; i++) { 3238 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3239 ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr; 3240 if (type == MPIU_INT) { 3241 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3242 } else if (type == MPIU_SCALAR) { 3243 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3244 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3245 } 3246 return 0; 3247 } 3248 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm) 3249 { 3250 int ierr; 3251 static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */ 3252 MPI_Status status; 3253 PetscInt i,numchunks; 3254 PetscMPIInt icnt; 3255 3256 numchunks = cnt/CHUNKSIZE + 1; 3257 for (i=0; i<numchunks; i++) { 3258 icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE); 3259 ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr; 3260 if (type == MPIU_INT) { 3261 mess = (void*) (((PetscInt*)mess) + CHUNKSIZE); 3262 } else if (type == MPIU_SCALAR) { 3263 mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE); 3264 } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype"); 3265 } 3266 return 0; 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatLoad_MPIAIJ" 3271 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3272 { 3273 PetscScalar *vals,*svals; 3274 MPI_Comm comm = ((PetscObject)viewer)->comm; 3275 PetscErrorCode ierr; 3276 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3277 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3278 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3279 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3280 PetscInt cend,cstart,n,*rowners,sizesset=1; 3281 int fd; 3282 3283 PetscFunctionBegin; 3284 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3285 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3286 if (!rank) { 3287 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3288 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3289 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3290 } 3291 3292 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3293 3294 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3295 M = header[1]; N = header[2]; 3296 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3297 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3298 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3299 3300 /* If global sizes are set, check if they are consistent with that given in the file */ 3301 if (sizesset) { 3302 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3303 } 3304 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3305 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3306 3307 /* determine ownership of all rows */ 3308 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3309 else m = newMat->rmap->n; /* Set by user */ 3310 3311 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3312 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3313 3314 /* First process needs enough room for process with most rows */ 3315 if (!rank) { 3316 mmax = rowners[1]; 3317 for (i=2; i<size; i++) { 3318 mmax = PetscMax(mmax,rowners[i]); 3319 } 3320 } else mmax = m; 3321 3322 rowners[0] = 0; 3323 for (i=2; i<=size; i++) { 3324 rowners[i] += rowners[i-1]; 3325 } 3326 rstart = rowners[rank]; 3327 rend = rowners[rank+1]; 3328 3329 /* distribute row lengths to all processors */ 3330 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3331 if (!rank) { 3332 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3333 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3334 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3335 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3336 for (j=0; j<m; j++) { 3337 procsnz[0] += ourlens[j]; 3338 } 3339 for (i=1; i<size; i++) { 3340 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3341 /* calculate the number of nonzeros on each processor */ 3342 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3343 procsnz[i] += rowlengths[j]; 3344 } 3345 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3346 } 3347 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3348 } else { 3349 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3350 } 3351 3352 if (!rank) { 3353 /* determine max buffer needed and allocate it */ 3354 maxnz = 0; 3355 for (i=0; i<size; i++) { 3356 maxnz = PetscMax(maxnz,procsnz[i]); 3357 } 3358 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3359 3360 /* read in my part of the matrix column indices */ 3361 nz = procsnz[0]; 3362 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3363 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3364 3365 /* read in every one elses and ship off */ 3366 for (i=1; i<size; i++) { 3367 nz = procsnz[i]; 3368 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3369 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3370 } 3371 ierr = PetscFree(cols);CHKERRQ(ierr); 3372 } else { 3373 /* determine buffer space needed for message */ 3374 nz = 0; 3375 for (i=0; i<m; i++) { 3376 nz += ourlens[i]; 3377 } 3378 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3379 3380 /* receive message of column indices*/ 3381 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3382 } 3383 3384 /* determine column ownership if matrix is not square */ 3385 if (N != M) { 3386 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3387 else n = newMat->cmap->n; 3388 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3389 cstart = cend - n; 3390 } else { 3391 cstart = rstart; 3392 cend = rend; 3393 n = cend - cstart; 3394 } 3395 3396 /* loop over local rows, determining number of off diagonal entries */ 3397 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3398 jj = 0; 3399 for (i=0; i<m; i++) { 3400 for (j=0; j<ourlens[i]; j++) { 3401 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3402 jj++; 3403 } 3404 } 3405 3406 for (i=0; i<m; i++) { 3407 ourlens[i] -= offlens[i]; 3408 } 3409 if (!sizesset) { 3410 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3411 } 3412 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3413 3414 for (i=0; i<m; i++) { 3415 ourlens[i] += offlens[i]; 3416 } 3417 3418 if (!rank) { 3419 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3420 3421 /* read in my part of the matrix numerical values */ 3422 nz = procsnz[0]; 3423 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3424 3425 /* insert into matrix */ 3426 jj = rstart; 3427 smycols = mycols; 3428 svals = vals; 3429 for (i=0; i<m; i++) { 3430 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3431 smycols += ourlens[i]; 3432 svals += ourlens[i]; 3433 jj++; 3434 } 3435 3436 /* read in other processors and ship out */ 3437 for (i=1; i<size; i++) { 3438 nz = procsnz[i]; 3439 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3440 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3441 } 3442 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3443 } else { 3444 /* receive numeric values */ 3445 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3446 3447 /* receive message of values*/ 3448 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3449 3450 /* insert into matrix */ 3451 jj = rstart; 3452 smycols = mycols; 3453 svals = vals; 3454 for (i=0; i<m; i++) { 3455 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3456 smycols += ourlens[i]; 3457 svals += ourlens[i]; 3458 jj++; 3459 } 3460 } 3461 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3462 ierr = PetscFree(vals);CHKERRQ(ierr); 3463 ierr = PetscFree(mycols);CHKERRQ(ierr); 3464 ierr = PetscFree(rowners);CHKERRQ(ierr); 3465 3466 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3467 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } 3470 3471 #undef __FUNCT__ 3472 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3473 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3474 { 3475 PetscErrorCode ierr; 3476 IS iscol_local; 3477 PetscInt csize; 3478 3479 PetscFunctionBegin; 3480 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3481 if (call == MAT_REUSE_MATRIX) { 3482 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3483 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3484 } else { 3485 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3486 } 3487 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3488 if (call == MAT_INITIAL_MATRIX) { 3489 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3490 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3491 } 3492 PetscFunctionReturn(0); 3493 } 3494 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3497 /* 3498 Not great since it makes two copies of the submatrix, first an SeqAIJ 3499 in local and then by concatenating the local matrices the end result. 3500 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3501 3502 Note: This requires a sequential iscol with all indices. 3503 */ 3504 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3505 { 3506 PetscErrorCode ierr; 3507 PetscMPIInt rank,size; 3508 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3509 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3510 Mat *local,M,Mreuse; 3511 MatScalar *vwork,*aa; 3512 MPI_Comm comm = ((PetscObject)mat)->comm; 3513 Mat_SeqAIJ *aij; 3514 3515 3516 PetscFunctionBegin; 3517 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3518 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3519 3520 if (call == MAT_REUSE_MATRIX) { 3521 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3522 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3523 local = &Mreuse; 3524 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3525 } else { 3526 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3527 Mreuse = *local; 3528 ierr = PetscFree(local);CHKERRQ(ierr); 3529 } 3530 3531 /* 3532 m - number of local rows 3533 n - number of columns (same on all processors) 3534 rstart - first row in new global matrix generated 3535 */ 3536 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3537 if (call == MAT_INITIAL_MATRIX) { 3538 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3539 ii = aij->i; 3540 jj = aij->j; 3541 3542 /* 3543 Determine the number of non-zeros in the diagonal and off-diagonal 3544 portions of the matrix in order to do correct preallocation 3545 */ 3546 3547 /* first get start and end of "diagonal" columns */ 3548 if (csize == PETSC_DECIDE) { 3549 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3550 if (mglobal == n) { /* square matrix */ 3551 nlocal = m; 3552 } else { 3553 nlocal = n/size + ((n % size) > rank); 3554 } 3555 } else { 3556 nlocal = csize; 3557 } 3558 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3559 rstart = rend - nlocal; 3560 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3561 3562 /* next, compute all the lengths */ 3563 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3564 olens = dlens + m; 3565 for (i=0; i<m; i++) { 3566 jend = ii[i+1] - ii[i]; 3567 olen = 0; 3568 dlen = 0; 3569 for (j=0; j<jend; j++) { 3570 if (*jj < rstart || *jj >= rend) olen++; 3571 else dlen++; 3572 jj++; 3573 } 3574 olens[i] = olen; 3575 dlens[i] = dlen; 3576 } 3577 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3578 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3579 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3580 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3581 ierr = PetscFree(dlens);CHKERRQ(ierr); 3582 } else { 3583 PetscInt ml,nl; 3584 3585 M = *newmat; 3586 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3587 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3588 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3589 /* 3590 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3591 rather than the slower MatSetValues(). 3592 */ 3593 M->was_assembled = PETSC_TRUE; 3594 M->assembled = PETSC_FALSE; 3595 } 3596 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3597 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3598 ii = aij->i; 3599 jj = aij->j; 3600 aa = aij->a; 3601 for (i=0; i<m; i++) { 3602 row = rstart + i; 3603 nz = ii[i+1] - ii[i]; 3604 cwork = jj; jj += nz; 3605 vwork = aa; aa += nz; 3606 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3607 } 3608 3609 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 *newmat = M; 3612 3613 /* save submatrix used in processor for next request */ 3614 if (call == MAT_INITIAL_MATRIX) { 3615 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3616 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3617 } 3618 3619 PetscFunctionReturn(0); 3620 } 3621 3622 EXTERN_C_BEGIN 3623 #undef __FUNCT__ 3624 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3625 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3626 { 3627 PetscInt m,cstart, cend,j,nnz,i,d; 3628 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3629 const PetscInt *JJ; 3630 PetscScalar *values; 3631 PetscErrorCode ierr; 3632 3633 PetscFunctionBegin; 3634 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3635 3636 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3637 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3638 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3639 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3640 m = B->rmap->n; 3641 cstart = B->cmap->rstart; 3642 cend = B->cmap->rend; 3643 rstart = B->rmap->rstart; 3644 3645 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3646 3647 #if defined(PETSC_USE_DEBUGGING) 3648 for (i=0; i<m; i++) { 3649 nnz = Ii[i+1]- Ii[i]; 3650 JJ = J + Ii[i]; 3651 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3652 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3653 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3654 } 3655 #endif 3656 3657 for (i=0; i<m; i++) { 3658 nnz = Ii[i+1]- Ii[i]; 3659 JJ = J + Ii[i]; 3660 nnz_max = PetscMax(nnz_max,nnz); 3661 d = 0; 3662 for (j=0; j<nnz; j++) { 3663 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3664 } 3665 d_nnz[i] = d; 3666 o_nnz[i] = nnz - d; 3667 } 3668 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3669 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3670 3671 if (v) values = (PetscScalar*)v; 3672 else { 3673 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3674 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3675 } 3676 3677 for (i=0; i<m; i++) { 3678 ii = i + rstart; 3679 nnz = Ii[i+1]- Ii[i]; 3680 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3681 } 3682 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3683 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 3685 if (!v) { 3686 ierr = PetscFree(values);CHKERRQ(ierr); 3687 } 3688 PetscFunctionReturn(0); 3689 } 3690 EXTERN_C_END 3691 3692 #undef __FUNCT__ 3693 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3694 /*@ 3695 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3696 (the default parallel PETSc format). 3697 3698 Collective on MPI_Comm 3699 3700 Input Parameters: 3701 + B - the matrix 3702 . i - the indices into j for the start of each local row (starts with zero) 3703 . j - the column indices for each local row (starts with zero) 3704 - v - optional values in the matrix 3705 3706 Level: developer 3707 3708 Notes: 3709 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3710 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3711 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3712 3713 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3714 3715 The format which is used for the sparse matrix input, is equivalent to a 3716 row-major ordering.. i.e for the following matrix, the input data expected is 3717 as shown: 3718 3719 1 0 0 3720 2 0 3 P0 3721 ------- 3722 4 5 6 P1 3723 3724 Process0 [P0]: rows_owned=[0,1] 3725 i = {0,1,3} [size = nrow+1 = 2+1] 3726 j = {0,0,2} [size = nz = 6] 3727 v = {1,2,3} [size = nz = 6] 3728 3729 Process1 [P1]: rows_owned=[2] 3730 i = {0,3} [size = nrow+1 = 1+1] 3731 j = {0,1,2} [size = nz = 6] 3732 v = {4,5,6} [size = nz = 6] 3733 3734 .keywords: matrix, aij, compressed row, sparse, parallel 3735 3736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3737 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3738 @*/ 3739 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3740 { 3741 PetscErrorCode ierr; 3742 3743 PetscFunctionBegin; 3744 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3745 PetscFunctionReturn(0); 3746 } 3747 3748 #undef __FUNCT__ 3749 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3750 /*@C 3751 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3752 (the default parallel PETSc format). For good matrix assembly performance 3753 the user should preallocate the matrix storage by setting the parameters 3754 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3755 performance can be increased by more than a factor of 50. 3756 3757 Collective on MPI_Comm 3758 3759 Input Parameters: 3760 + A - the matrix 3761 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3762 (same value is used for all local rows) 3763 . d_nnz - array containing the number of nonzeros in the various rows of the 3764 DIAGONAL portion of the local submatrix (possibly different for each row) 3765 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3766 The size of this array is equal to the number of local rows, i.e 'm'. 3767 You must leave room for the diagonal entry even if it is zero. 3768 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3769 submatrix (same value is used for all local rows). 3770 - o_nnz - array containing the number of nonzeros in the various rows of the 3771 OFF-DIAGONAL portion of the local submatrix (possibly different for 3772 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3773 structure. The size of this array is equal to the number 3774 of local rows, i.e 'm'. 3775 3776 If the *_nnz parameter is given then the *_nz parameter is ignored 3777 3778 The AIJ format (also called the Yale sparse matrix format or 3779 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3780 storage. The stored row and column indices begin with zero. 3781 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3782 3783 The parallel matrix is partitioned such that the first m0 rows belong to 3784 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3785 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3786 3787 The DIAGONAL portion of the local submatrix of a processor can be defined 3788 as the submatrix which is obtained by extraction the part corresponding to 3789 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3790 first row that belongs to the processor, r2 is the last row belonging to 3791 the this processor, and c1-c2 is range of indices of the local part of a 3792 vector suitable for applying the matrix to. This is an mxn matrix. In the 3793 common case of a square matrix, the row and column ranges are the same and 3794 the DIAGONAL part is also square. The remaining portion of the local 3795 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3796 3797 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3798 3799 You can call MatGetInfo() to get information on how effective the preallocation was; 3800 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3801 You can also run with the option -info and look for messages with the string 3802 malloc in them to see if additional memory allocation was needed. 3803 3804 Example usage: 3805 3806 Consider the following 8x8 matrix with 34 non-zero values, that is 3807 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3808 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3809 as follows: 3810 3811 .vb 3812 1 2 0 | 0 3 0 | 0 4 3813 Proc0 0 5 6 | 7 0 0 | 8 0 3814 9 0 10 | 11 0 0 | 12 0 3815 ------------------------------------- 3816 13 0 14 | 15 16 17 | 0 0 3817 Proc1 0 18 0 | 19 20 21 | 0 0 3818 0 0 0 | 22 23 0 | 24 0 3819 ------------------------------------- 3820 Proc2 25 26 27 | 0 0 28 | 29 0 3821 30 0 0 | 31 32 33 | 0 34 3822 .ve 3823 3824 This can be represented as a collection of submatrices as: 3825 3826 .vb 3827 A B C 3828 D E F 3829 G H I 3830 .ve 3831 3832 Where the submatrices A,B,C are owned by proc0, D,E,F are 3833 owned by proc1, G,H,I are owned by proc2. 3834 3835 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3836 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3837 The 'M','N' parameters are 8,8, and have the same values on all procs. 3838 3839 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3840 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3841 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3842 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3843 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3844 matrix, ans [DF] as another SeqAIJ matrix. 3845 3846 When d_nz, o_nz parameters are specified, d_nz storage elements are 3847 allocated for every row of the local diagonal submatrix, and o_nz 3848 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3849 One way to choose d_nz and o_nz is to use the max nonzerors per local 3850 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3851 In this case, the values of d_nz,o_nz are: 3852 .vb 3853 proc0 : dnz = 2, o_nz = 2 3854 proc1 : dnz = 3, o_nz = 2 3855 proc2 : dnz = 1, o_nz = 4 3856 .ve 3857 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3858 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3859 for proc3. i.e we are using 12+15+10=37 storage locations to store 3860 34 values. 3861 3862 When d_nnz, o_nnz parameters are specified, the storage is specified 3863 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3864 In the above case the values for d_nnz,o_nnz are: 3865 .vb 3866 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3867 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3868 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3869 .ve 3870 Here the space allocated is sum of all the above values i.e 34, and 3871 hence pre-allocation is perfect. 3872 3873 Level: intermediate 3874 3875 .keywords: matrix, aij, compressed row, sparse, parallel 3876 3877 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3878 MPIAIJ, MatGetInfo() 3879 @*/ 3880 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3881 { 3882 PetscErrorCode ierr; 3883 3884 PetscFunctionBegin; 3885 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3886 PetscFunctionReturn(0); 3887 } 3888 3889 #undef __FUNCT__ 3890 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3891 /*@ 3892 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3893 CSR format the local rows. 3894 3895 Collective on MPI_Comm 3896 3897 Input Parameters: 3898 + comm - MPI communicator 3899 . m - number of local rows (Cannot be PETSC_DECIDE) 3900 . n - This value should be the same as the local size used in creating the 3901 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3902 calculated if N is given) For square matrices n is almost always m. 3903 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3904 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3905 . i - row indices 3906 . j - column indices 3907 - a - matrix values 3908 3909 Output Parameter: 3910 . mat - the matrix 3911 3912 Level: intermediate 3913 3914 Notes: 3915 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3916 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3917 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3918 3919 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3920 3921 The format which is used for the sparse matrix input, is equivalent to a 3922 row-major ordering.. i.e for the following matrix, the input data expected is 3923 as shown: 3924 3925 1 0 0 3926 2 0 3 P0 3927 ------- 3928 4 5 6 P1 3929 3930 Process0 [P0]: rows_owned=[0,1] 3931 i = {0,1,3} [size = nrow+1 = 2+1] 3932 j = {0,0,2} [size = nz = 6] 3933 v = {1,2,3} [size = nz = 6] 3934 3935 Process1 [P1]: rows_owned=[2] 3936 i = {0,3} [size = nrow+1 = 1+1] 3937 j = {0,1,2} [size = nz = 6] 3938 v = {4,5,6} [size = nz = 6] 3939 3940 .keywords: matrix, aij, compressed row, sparse, parallel 3941 3942 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3943 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3944 @*/ 3945 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3946 { 3947 PetscErrorCode ierr; 3948 3949 PetscFunctionBegin; 3950 if (i[0]) { 3951 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3952 } 3953 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3954 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3955 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3956 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3957 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3958 PetscFunctionReturn(0); 3959 } 3960 3961 #undef __FUNCT__ 3962 #define __FUNCT__ "MatCreateMPIAIJ" 3963 /*@C 3964 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3965 (the default parallel PETSc format). For good matrix assembly performance 3966 the user should preallocate the matrix storage by setting the parameters 3967 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3968 performance can be increased by more than a factor of 50. 3969 3970 Collective on MPI_Comm 3971 3972 Input Parameters: 3973 + comm - MPI communicator 3974 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3975 This value should be the same as the local size used in creating the 3976 y vector for the matrix-vector product y = Ax. 3977 . n - This value should be the same as the local size used in creating the 3978 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3979 calculated if N is given) For square matrices n is almost always m. 3980 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3981 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3982 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3983 (same value is used for all local rows) 3984 . d_nnz - array containing the number of nonzeros in the various rows of the 3985 DIAGONAL portion of the local submatrix (possibly different for each row) 3986 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3987 The size of this array is equal to the number of local rows, i.e 'm'. 3988 You must leave room for the diagonal entry even if it is zero. 3989 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3990 submatrix (same value is used for all local rows). 3991 - o_nnz - array containing the number of nonzeros in the various rows of the 3992 OFF-DIAGONAL portion of the local submatrix (possibly different for 3993 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3994 structure. The size of this array is equal to the number 3995 of local rows, i.e 'm'. 3996 3997 Output Parameter: 3998 . A - the matrix 3999 4000 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4001 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4002 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4003 4004 Notes: 4005 If the *_nnz parameter is given then the *_nz parameter is ignored 4006 4007 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4008 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4009 storage requirements for this matrix. 4010 4011 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4012 processor than it must be used on all processors that share the object for 4013 that argument. 4014 4015 The user MUST specify either the local or global matrix dimensions 4016 (possibly both). 4017 4018 The parallel matrix is partitioned across processors such that the 4019 first m0 rows belong to process 0, the next m1 rows belong to 4020 process 1, the next m2 rows belong to process 2 etc.. where 4021 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4022 values corresponding to [m x N] submatrix. 4023 4024 The columns are logically partitioned with the n0 columns belonging 4025 to 0th partition, the next n1 columns belonging to the next 4026 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4027 4028 The DIAGONAL portion of the local submatrix on any given processor 4029 is the submatrix corresponding to the rows and columns m,n 4030 corresponding to the given processor. i.e diagonal matrix on 4031 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4032 etc. The remaining portion of the local submatrix [m x (N-n)] 4033 constitute the OFF-DIAGONAL portion. The example below better 4034 illustrates this concept. 4035 4036 For a square global matrix we define each processor's diagonal portion 4037 to be its local rows and the corresponding columns (a square submatrix); 4038 each processor's off-diagonal portion encompasses the remainder of the 4039 local matrix (a rectangular submatrix). 4040 4041 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4042 4043 When calling this routine with a single process communicator, a matrix of 4044 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4045 type of communicator, use the construction mechanism: 4046 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4047 4048 By default, this format uses inodes (identical nodes) when possible. 4049 We search for consecutive rows with the same nonzero structure, thereby 4050 reusing matrix information to achieve increased efficiency. 4051 4052 Options Database Keys: 4053 + -mat_no_inode - Do not use inodes 4054 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4055 - -mat_aij_oneindex - Internally use indexing starting at 1 4056 rather than 0. Note that when calling MatSetValues(), 4057 the user still MUST index entries starting at 0! 4058 4059 4060 Example usage: 4061 4062 Consider the following 8x8 matrix with 34 non-zero values, that is 4063 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4064 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4065 as follows: 4066 4067 .vb 4068 1 2 0 | 0 3 0 | 0 4 4069 Proc0 0 5 6 | 7 0 0 | 8 0 4070 9 0 10 | 11 0 0 | 12 0 4071 ------------------------------------- 4072 13 0 14 | 15 16 17 | 0 0 4073 Proc1 0 18 0 | 19 20 21 | 0 0 4074 0 0 0 | 22 23 0 | 24 0 4075 ------------------------------------- 4076 Proc2 25 26 27 | 0 0 28 | 29 0 4077 30 0 0 | 31 32 33 | 0 34 4078 .ve 4079 4080 This can be represented as a collection of submatrices as: 4081 4082 .vb 4083 A B C 4084 D E F 4085 G H I 4086 .ve 4087 4088 Where the submatrices A,B,C are owned by proc0, D,E,F are 4089 owned by proc1, G,H,I are owned by proc2. 4090 4091 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4092 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4093 The 'M','N' parameters are 8,8, and have the same values on all procs. 4094 4095 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4096 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4097 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4098 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4099 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4100 matrix, ans [DF] as another SeqAIJ matrix. 4101 4102 When d_nz, o_nz parameters are specified, d_nz storage elements are 4103 allocated for every row of the local diagonal submatrix, and o_nz 4104 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4105 One way to choose d_nz and o_nz is to use the max nonzerors per local 4106 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4107 In this case, the values of d_nz,o_nz are: 4108 .vb 4109 proc0 : dnz = 2, o_nz = 2 4110 proc1 : dnz = 3, o_nz = 2 4111 proc2 : dnz = 1, o_nz = 4 4112 .ve 4113 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4114 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4115 for proc3. i.e we are using 12+15+10=37 storage locations to store 4116 34 values. 4117 4118 When d_nnz, o_nnz parameters are specified, the storage is specified 4119 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4120 In the above case the values for d_nnz,o_nnz are: 4121 .vb 4122 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4123 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4124 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4125 .ve 4126 Here the space allocated is sum of all the above values i.e 34, and 4127 hence pre-allocation is perfect. 4128 4129 Level: intermediate 4130 4131 .keywords: matrix, aij, compressed row, sparse, parallel 4132 4133 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4134 MPIAIJ, MatCreateMPIAIJWithArrays() 4135 @*/ 4136 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4137 { 4138 PetscErrorCode ierr; 4139 PetscMPIInt size; 4140 4141 PetscFunctionBegin; 4142 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4143 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4144 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4145 if (size > 1) { 4146 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4147 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4148 } else { 4149 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4150 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4151 } 4152 PetscFunctionReturn(0); 4153 } 4154 4155 #undef __FUNCT__ 4156 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4157 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4158 { 4159 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4160 4161 PetscFunctionBegin; 4162 *Ad = a->A; 4163 *Ao = a->B; 4164 *colmap = a->garray; 4165 PetscFunctionReturn(0); 4166 } 4167 4168 #undef __FUNCT__ 4169 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4170 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4171 { 4172 PetscErrorCode ierr; 4173 PetscInt i; 4174 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4175 4176 PetscFunctionBegin; 4177 if (coloring->ctype == IS_COLORING_GLOBAL) { 4178 ISColoringValue *allcolors,*colors; 4179 ISColoring ocoloring; 4180 4181 /* set coloring for diagonal portion */ 4182 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4183 4184 /* set coloring for off-diagonal portion */ 4185 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4186 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4187 for (i=0; i<a->B->cmap->n; i++) { 4188 colors[i] = allcolors[a->garray[i]]; 4189 } 4190 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4191 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4192 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4193 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4194 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4195 ISColoringValue *colors; 4196 PetscInt *larray; 4197 ISColoring ocoloring; 4198 4199 /* set coloring for diagonal portion */ 4200 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4201 for (i=0; i<a->A->cmap->n; i++) { 4202 larray[i] = i + A->cmap->rstart; 4203 } 4204 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4205 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4206 for (i=0; i<a->A->cmap->n; i++) { 4207 colors[i] = coloring->colors[larray[i]]; 4208 } 4209 ierr = PetscFree(larray);CHKERRQ(ierr); 4210 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4211 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4212 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4213 4214 /* set coloring for off-diagonal portion */ 4215 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4216 ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4217 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4218 for (i=0; i<a->B->cmap->n; i++) { 4219 colors[i] = coloring->colors[larray[i]]; 4220 } 4221 ierr = PetscFree(larray);CHKERRQ(ierr); 4222 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4223 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4224 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4225 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4226 4227 PetscFunctionReturn(0); 4228 } 4229 4230 #if defined(PETSC_HAVE_ADIC) 4231 #undef __FUNCT__ 4232 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4233 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4234 { 4235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4236 PetscErrorCode ierr; 4237 4238 PetscFunctionBegin; 4239 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4240 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4241 PetscFunctionReturn(0); 4242 } 4243 #endif 4244 4245 #undef __FUNCT__ 4246 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4247 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4248 { 4249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4250 PetscErrorCode ierr; 4251 4252 PetscFunctionBegin; 4253 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4254 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4255 PetscFunctionReturn(0); 4256 } 4257 4258 #undef __FUNCT__ 4259 #define __FUNCT__ "MatMerge" 4260 /*@ 4261 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4262 matrices from each processor 4263 4264 Collective on MPI_Comm 4265 4266 Input Parameters: 4267 + comm - the communicators the parallel matrix will live on 4268 . inmat - the input sequential matrices 4269 . n - number of local columns (or PETSC_DECIDE) 4270 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4271 4272 Output Parameter: 4273 . outmat - the parallel matrix generated 4274 4275 Level: advanced 4276 4277 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4278 4279 @*/ 4280 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4281 { 4282 PetscErrorCode ierr; 4283 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4284 PetscInt *indx; 4285 PetscScalar *values; 4286 4287 PetscFunctionBegin; 4288 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4289 if (scall == MAT_INITIAL_MATRIX){ 4290 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4291 if (n == PETSC_DECIDE){ 4292 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4293 } 4294 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4295 rstart -= m; 4296 4297 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4298 for (i=0;i<m;i++) { 4299 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4300 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4301 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4302 } 4303 /* This routine will ONLY return MPIAIJ type matrix */ 4304 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4305 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4306 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4307 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4308 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4309 4310 } else if (scall == MAT_REUSE_MATRIX){ 4311 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4312 } else { 4313 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4314 } 4315 4316 for (i=0;i<m;i++) { 4317 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4318 Ii = i + rstart; 4319 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4320 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4321 } 4322 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4323 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4324 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4325 4326 PetscFunctionReturn(0); 4327 } 4328 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "MatFileSplit" 4331 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4332 { 4333 PetscErrorCode ierr; 4334 PetscMPIInt rank; 4335 PetscInt m,N,i,rstart,nnz; 4336 size_t len; 4337 const PetscInt *indx; 4338 PetscViewer out; 4339 char *name; 4340 Mat B; 4341 const PetscScalar *values; 4342 4343 PetscFunctionBegin; 4344 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4345 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4346 /* Should this be the type of the diagonal block of A? */ 4347 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4348 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4349 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4350 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4351 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4352 for (i=0;i<m;i++) { 4353 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4354 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4355 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4356 } 4357 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4358 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4359 4360 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4361 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4362 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4363 sprintf(name,"%s.%d",outfile,rank); 4364 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4365 ierr = PetscFree(name); 4366 ierr = MatView(B,out);CHKERRQ(ierr); 4367 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4368 ierr = MatDestroy(&B);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 4372 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4373 #undef __FUNCT__ 4374 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4375 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4376 { 4377 PetscErrorCode ierr; 4378 Mat_Merge_SeqsToMPI *merge; 4379 PetscContainer container; 4380 4381 PetscFunctionBegin; 4382 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4383 if (container) { 4384 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4385 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4386 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4387 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4388 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4389 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4390 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4391 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4392 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4393 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4394 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4395 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4396 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4397 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4398 ierr = PetscFree(merge);CHKERRQ(ierr); 4399 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4400 } 4401 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4402 PetscFunctionReturn(0); 4403 } 4404 4405 #include <../src/mat/utils/freespace.h> 4406 #include <petscbt.h> 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4410 /*@C 4411 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4412 matrices from each processor 4413 4414 Collective on MPI_Comm 4415 4416 Input Parameters: 4417 + comm - the communicators the parallel matrix will live on 4418 . seqmat - the input sequential matrices 4419 . m - number of local rows (or PETSC_DECIDE) 4420 . n - number of local columns (or PETSC_DECIDE) 4421 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4422 4423 Output Parameter: 4424 . mpimat - the parallel matrix generated 4425 4426 Level: advanced 4427 4428 Notes: 4429 The dimensions of the sequential matrix in each processor MUST be the same. 4430 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4431 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4432 @*/ 4433 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4434 { 4435 PetscErrorCode ierr; 4436 MPI_Comm comm=((PetscObject)mpimat)->comm; 4437 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4438 PetscMPIInt size,rank,taga,*len_s; 4439 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4440 PetscInt proc,m; 4441 PetscInt **buf_ri,**buf_rj; 4442 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4443 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4444 MPI_Request *s_waits,*r_waits; 4445 MPI_Status *status; 4446 MatScalar *aa=a->a; 4447 MatScalar **abuf_r,*ba_i; 4448 Mat_Merge_SeqsToMPI *merge; 4449 PetscContainer container; 4450 4451 PetscFunctionBegin; 4452 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4453 4454 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4455 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4456 4457 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4458 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4459 4460 bi = merge->bi; 4461 bj = merge->bj; 4462 buf_ri = merge->buf_ri; 4463 buf_rj = merge->buf_rj; 4464 4465 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4466 owners = merge->rowmap->range; 4467 len_s = merge->len_s; 4468 4469 /* send and recv matrix values */ 4470 /*-----------------------------*/ 4471 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4472 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4473 4474 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4475 for (proc=0,k=0; proc<size; proc++){ 4476 if (!len_s[proc]) continue; 4477 i = owners[proc]; 4478 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4479 k++; 4480 } 4481 4482 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4483 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4484 ierr = PetscFree(status);CHKERRQ(ierr); 4485 4486 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4487 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4488 4489 /* insert mat values of mpimat */ 4490 /*----------------------------*/ 4491 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4492 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4493 4494 for (k=0; k<merge->nrecv; k++){ 4495 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4496 nrows = *(buf_ri_k[k]); 4497 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4498 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4499 } 4500 4501 /* set values of ba */ 4502 m = merge->rowmap->n; 4503 for (i=0; i<m; i++) { 4504 arow = owners[rank] + i; 4505 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4506 bnzi = bi[i+1] - bi[i]; 4507 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4508 4509 /* add local non-zero vals of this proc's seqmat into ba */ 4510 anzi = ai[arow+1] - ai[arow]; 4511 aj = a->j + ai[arow]; 4512 aa = a->a + ai[arow]; 4513 nextaj = 0; 4514 for (j=0; nextaj<anzi; j++){ 4515 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4516 ba_i[j] += aa[nextaj++]; 4517 } 4518 } 4519 4520 /* add received vals into ba */ 4521 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4522 /* i-th row */ 4523 if (i == *nextrow[k]) { 4524 anzi = *(nextai[k]+1) - *nextai[k]; 4525 aj = buf_rj[k] + *(nextai[k]); 4526 aa = abuf_r[k] + *(nextai[k]); 4527 nextaj = 0; 4528 for (j=0; nextaj<anzi; j++){ 4529 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4530 ba_i[j] += aa[nextaj++]; 4531 } 4532 } 4533 nextrow[k]++; nextai[k]++; 4534 } 4535 } 4536 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4537 } 4538 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4539 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4540 4541 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4542 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4543 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4544 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4545 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4546 PetscFunctionReturn(0); 4547 } 4548 4549 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4550 4551 #undef __FUNCT__ 4552 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4553 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4554 { 4555 PetscErrorCode ierr; 4556 Mat B_mpi; 4557 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4558 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4559 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4560 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4561 PetscInt len,proc,*dnz,*onz; 4562 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4563 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4564 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4565 MPI_Status *status; 4566 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4567 PetscBT lnkbt; 4568 Mat_Merge_SeqsToMPI *merge; 4569 PetscContainer container; 4570 4571 PetscFunctionBegin; 4572 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4573 4574 /* make sure it is a PETSc comm */ 4575 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4576 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4577 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4578 4579 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4580 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4581 4582 /* determine row ownership */ 4583 /*---------------------------------------------------------*/ 4584 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4585 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4586 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4587 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4588 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4589 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4590 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4591 4592 m = merge->rowmap->n; 4593 M = merge->rowmap->N; 4594 owners = merge->rowmap->range; 4595 4596 /* determine the number of messages to send, their lengths */ 4597 /*---------------------------------------------------------*/ 4598 len_s = merge->len_s; 4599 4600 len = 0; /* length of buf_si[] */ 4601 merge->nsend = 0; 4602 for (proc=0; proc<size; proc++){ 4603 len_si[proc] = 0; 4604 if (proc == rank){ 4605 len_s[proc] = 0; 4606 } else { 4607 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4608 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4609 } 4610 if (len_s[proc]) { 4611 merge->nsend++; 4612 nrows = 0; 4613 for (i=owners[proc]; i<owners[proc+1]; i++){ 4614 if (ai[i+1] > ai[i]) nrows++; 4615 } 4616 len_si[proc] = 2*(nrows+1); 4617 len += len_si[proc]; 4618 } 4619 } 4620 4621 /* determine the number and length of messages to receive for ij-structure */ 4622 /*-------------------------------------------------------------------------*/ 4623 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4624 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4625 4626 /* post the Irecv of j-structure */ 4627 /*-------------------------------*/ 4628 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4629 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4630 4631 /* post the Isend of j-structure */ 4632 /*--------------------------------*/ 4633 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4634 4635 for (proc=0, k=0; proc<size; proc++){ 4636 if (!len_s[proc]) continue; 4637 i = owners[proc]; 4638 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4639 k++; 4640 } 4641 4642 /* receives and sends of j-structure are complete */ 4643 /*------------------------------------------------*/ 4644 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4645 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4646 4647 /* send and recv i-structure */ 4648 /*---------------------------*/ 4649 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4650 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4651 4652 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4653 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4654 for (proc=0,k=0; proc<size; proc++){ 4655 if (!len_s[proc]) continue; 4656 /* form outgoing message for i-structure: 4657 buf_si[0]: nrows to be sent 4658 [1:nrows]: row index (global) 4659 [nrows+1:2*nrows+1]: i-structure index 4660 */ 4661 /*-------------------------------------------*/ 4662 nrows = len_si[proc]/2 - 1; 4663 buf_si_i = buf_si + nrows+1; 4664 buf_si[0] = nrows; 4665 buf_si_i[0] = 0; 4666 nrows = 0; 4667 for (i=owners[proc]; i<owners[proc+1]; i++){ 4668 anzi = ai[i+1] - ai[i]; 4669 if (anzi) { 4670 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4671 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4672 nrows++; 4673 } 4674 } 4675 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4676 k++; 4677 buf_si += len_si[proc]; 4678 } 4679 4680 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4681 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4682 4683 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4684 for (i=0; i<merge->nrecv; i++){ 4685 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4686 } 4687 4688 ierr = PetscFree(len_si);CHKERRQ(ierr); 4689 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4690 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4691 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4692 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4693 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4694 ierr = PetscFree(status);CHKERRQ(ierr); 4695 4696 /* compute a local seq matrix in each processor */ 4697 /*----------------------------------------------*/ 4698 /* allocate bi array and free space for accumulating nonzero column info */ 4699 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4700 bi[0] = 0; 4701 4702 /* create and initialize a linked list */ 4703 nlnk = N+1; 4704 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4705 4706 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4707 len = 0; 4708 len = ai[owners[rank+1]] - ai[owners[rank]]; 4709 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4710 current_space = free_space; 4711 4712 /* determine symbolic info for each local row */ 4713 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4714 4715 for (k=0; k<merge->nrecv; k++){ 4716 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4717 nrows = *buf_ri_k[k]; 4718 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4719 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4720 } 4721 4722 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4723 len = 0; 4724 for (i=0;i<m;i++) { 4725 bnzi = 0; 4726 /* add local non-zero cols of this proc's seqmat into lnk */ 4727 arow = owners[rank] + i; 4728 anzi = ai[arow+1] - ai[arow]; 4729 aj = a->j + ai[arow]; 4730 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4731 bnzi += nlnk; 4732 /* add received col data into lnk */ 4733 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4734 if (i == *nextrow[k]) { /* i-th row */ 4735 anzi = *(nextai[k]+1) - *nextai[k]; 4736 aj = buf_rj[k] + *nextai[k]; 4737 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4738 bnzi += nlnk; 4739 nextrow[k]++; nextai[k]++; 4740 } 4741 } 4742 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4743 4744 /* if free space is not available, make more free space */ 4745 if (current_space->local_remaining<bnzi) { 4746 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4747 nspacedouble++; 4748 } 4749 /* copy data into free space, then initialize lnk */ 4750 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4751 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4752 4753 current_space->array += bnzi; 4754 current_space->local_used += bnzi; 4755 current_space->local_remaining -= bnzi; 4756 4757 bi[i+1] = bi[i] + bnzi; 4758 } 4759 4760 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4761 4762 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4763 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4764 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4765 4766 /* create symbolic parallel matrix B_mpi */ 4767 /*---------------------------------------*/ 4768 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4769 if (n==PETSC_DECIDE) { 4770 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4771 } else { 4772 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4773 } 4774 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4775 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4776 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4777 4778 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4779 B_mpi->assembled = PETSC_FALSE; 4780 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4781 merge->bi = bi; 4782 merge->bj = bj; 4783 merge->buf_ri = buf_ri; 4784 merge->buf_rj = buf_rj; 4785 merge->coi = PETSC_NULL; 4786 merge->coj = PETSC_NULL; 4787 merge->owners_co = PETSC_NULL; 4788 4789 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4790 4791 /* attach the supporting struct to B_mpi for reuse */ 4792 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4793 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4794 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4795 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4796 *mpimat = B_mpi; 4797 4798 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4799 PetscFunctionReturn(0); 4800 } 4801 4802 #undef __FUNCT__ 4803 #define __FUNCT__ "MatMerge_SeqsToMPI" 4804 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4805 { 4806 PetscErrorCode ierr; 4807 4808 PetscFunctionBegin; 4809 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4810 if (scall == MAT_INITIAL_MATRIX){ 4811 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4812 } 4813 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4814 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4815 PetscFunctionReturn(0); 4816 } 4817 4818 #undef __FUNCT__ 4819 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4820 /*@ 4821 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4822 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4823 with MatGetSize() 4824 4825 Not Collective 4826 4827 Input Parameters: 4828 + A - the matrix 4829 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4830 4831 Output Parameter: 4832 . A_loc - the local sequential matrix generated 4833 4834 Level: developer 4835 4836 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4837 4838 @*/ 4839 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4840 { 4841 PetscErrorCode ierr; 4842 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4843 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4844 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4845 MatScalar *aa=a->a,*ba=b->a,*cam; 4846 PetscScalar *ca; 4847 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4848 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4849 PetscBool match; 4850 4851 PetscFunctionBegin; 4852 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4853 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4854 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4855 if (scall == MAT_INITIAL_MATRIX){ 4856 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4857 ci[0] = 0; 4858 for (i=0; i<am; i++){ 4859 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4860 } 4861 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4862 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4863 k = 0; 4864 for (i=0; i<am; i++) { 4865 ncols_o = bi[i+1] - bi[i]; 4866 ncols_d = ai[i+1] - ai[i]; 4867 /* off-diagonal portion of A */ 4868 for (jo=0; jo<ncols_o; jo++) { 4869 col = cmap[*bj]; 4870 if (col >= cstart) break; 4871 cj[k] = col; bj++; 4872 ca[k++] = *ba++; 4873 } 4874 /* diagonal portion of A */ 4875 for (j=0; j<ncols_d; j++) { 4876 cj[k] = cstart + *aj++; 4877 ca[k++] = *aa++; 4878 } 4879 /* off-diagonal portion of A */ 4880 for (j=jo; j<ncols_o; j++) { 4881 cj[k] = cmap[*bj++]; 4882 ca[k++] = *ba++; 4883 } 4884 } 4885 /* put together the new matrix */ 4886 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4887 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4888 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4889 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4890 mat->free_a = PETSC_TRUE; 4891 mat->free_ij = PETSC_TRUE; 4892 mat->nonew = 0; 4893 } else if (scall == MAT_REUSE_MATRIX){ 4894 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4895 ci = mat->i; cj = mat->j; cam = mat->a; 4896 for (i=0; i<am; i++) { 4897 /* off-diagonal portion of A */ 4898 ncols_o = bi[i+1] - bi[i]; 4899 for (jo=0; jo<ncols_o; jo++) { 4900 col = cmap[*bj]; 4901 if (col >= cstart) break; 4902 *cam++ = *ba++; bj++; 4903 } 4904 /* diagonal portion of A */ 4905 ncols_d = ai[i+1] - ai[i]; 4906 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4907 /* off-diagonal portion of A */ 4908 for (j=jo; j<ncols_o; j++) { 4909 *cam++ = *ba++; bj++; 4910 } 4911 } 4912 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4913 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4914 PetscFunctionReturn(0); 4915 } 4916 4917 #undef __FUNCT__ 4918 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4919 /*@C 4920 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4921 4922 Not Collective 4923 4924 Input Parameters: 4925 + A - the matrix 4926 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4927 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4928 4929 Output Parameter: 4930 . A_loc - the local sequential matrix generated 4931 4932 Level: developer 4933 4934 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4935 4936 @*/ 4937 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4938 { 4939 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4940 PetscErrorCode ierr; 4941 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4942 IS isrowa,iscola; 4943 Mat *aloc; 4944 PetscBool match; 4945 4946 PetscFunctionBegin; 4947 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4948 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4949 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4950 if (!row){ 4951 start = A->rmap->rstart; end = A->rmap->rend; 4952 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4953 } else { 4954 isrowa = *row; 4955 } 4956 if (!col){ 4957 start = A->cmap->rstart; 4958 cmap = a->garray; 4959 nzA = a->A->cmap->n; 4960 nzB = a->B->cmap->n; 4961 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4962 ncols = 0; 4963 for (i=0; i<nzB; i++) { 4964 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4965 else break; 4966 } 4967 imark = i; 4968 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4969 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4970 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4971 } else { 4972 iscola = *col; 4973 } 4974 if (scall != MAT_INITIAL_MATRIX){ 4975 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4976 aloc[0] = *A_loc; 4977 } 4978 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4979 *A_loc = aloc[0]; 4980 ierr = PetscFree(aloc);CHKERRQ(ierr); 4981 if (!row){ 4982 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4983 } 4984 if (!col){ 4985 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4986 } 4987 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4988 PetscFunctionReturn(0); 4989 } 4990 4991 #undef __FUNCT__ 4992 #define __FUNCT__ "MatGetBrowsOfAcols" 4993 /*@C 4994 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4995 4996 Collective on Mat 4997 4998 Input Parameters: 4999 + A,B - the matrices in mpiaij format 5000 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5001 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5002 5003 Output Parameter: 5004 + rowb, colb - index sets of rows and columns of B to extract 5005 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5006 - B_seq - the sequential matrix generated 5007 5008 Level: developer 5009 5010 @*/ 5011 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5012 { 5013 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5014 PetscErrorCode ierr; 5015 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5016 IS isrowb,iscolb; 5017 Mat *bseq; 5018 5019 PetscFunctionBegin; 5020 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5021 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5022 } 5023 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5024 5025 if (scall == MAT_INITIAL_MATRIX){ 5026 start = A->cmap->rstart; 5027 cmap = a->garray; 5028 nzA = a->A->cmap->n; 5029 nzB = a->B->cmap->n; 5030 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5031 ncols = 0; 5032 for (i=0; i<nzB; i++) { /* row < local row index */ 5033 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5034 else break; 5035 } 5036 imark = i; 5037 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5038 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5039 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5040 *brstart = imark; 5041 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5042 } else { 5043 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5044 isrowb = *rowb; iscolb = *colb; 5045 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5046 bseq[0] = *B_seq; 5047 } 5048 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5049 *B_seq = bseq[0]; 5050 ierr = PetscFree(bseq);CHKERRQ(ierr); 5051 if (!rowb){ 5052 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5053 } else { 5054 *rowb = isrowb; 5055 } 5056 if (!colb){ 5057 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5058 } else { 5059 *colb = iscolb; 5060 } 5061 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 #undef __FUNCT__ 5066 #define __FUNCT__ "MatGetBrowsOfAoCols" 5067 /*@C 5068 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5069 of the OFF-DIAGONAL portion of local A 5070 5071 Collective on Mat 5072 5073 Input Parameters: 5074 + A,B - the matrices in mpiaij format 5075 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5076 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5077 . startsj_r - similar to startsj for receives 5078 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5079 5080 Output Parameter: 5081 + B_oth - the sequential matrix generated 5082 5083 Level: developer 5084 5085 @*/ 5086 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5087 { 5088 VecScatter_MPI_General *gen_to,*gen_from; 5089 PetscErrorCode ierr; 5090 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5091 Mat_SeqAIJ *b_oth; 5092 VecScatter ctx=a->Mvctx; 5093 MPI_Comm comm=((PetscObject)ctx)->comm; 5094 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5095 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5096 PetscScalar *rvalues,*svalues; 5097 MatScalar *b_otha,*bufa,*bufA; 5098 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5099 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5100 MPI_Status *sstatus,rstatus; 5101 PetscMPIInt jj; 5102 PetscInt *cols,sbs,rbs; 5103 PetscScalar *vals; 5104 5105 PetscFunctionBegin; 5106 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5107 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5108 } 5109 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5110 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5111 5112 gen_to = (VecScatter_MPI_General*)ctx->todata; 5113 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5114 rvalues = gen_from->values; /* holds the length of receiving row */ 5115 svalues = gen_to->values; /* holds the length of sending row */ 5116 nrecvs = gen_from->n; 5117 nsends = gen_to->n; 5118 5119 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5120 srow = gen_to->indices; /* local row index to be sent */ 5121 sstarts = gen_to->starts; 5122 sprocs = gen_to->procs; 5123 sstatus = gen_to->sstatus; 5124 sbs = gen_to->bs; 5125 rstarts = gen_from->starts; 5126 rprocs = gen_from->procs; 5127 rbs = gen_from->bs; 5128 5129 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5130 if (scall == MAT_INITIAL_MATRIX){ 5131 /* i-array */ 5132 /*---------*/ 5133 /* post receives */ 5134 for (i=0; i<nrecvs; i++){ 5135 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5136 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5137 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5138 } 5139 5140 /* pack the outgoing message */ 5141 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5142 sstartsj[0] = 0; rstartsj[0] = 0; 5143 len = 0; /* total length of j or a array to be sent */ 5144 k = 0; 5145 for (i=0; i<nsends; i++){ 5146 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5147 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5148 for (j=0; j<nrows; j++) { 5149 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5150 for (l=0; l<sbs; l++){ 5151 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5152 rowlen[j*sbs+l] = ncols; 5153 len += ncols; 5154 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5155 } 5156 k++; 5157 } 5158 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5159 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5160 } 5161 /* recvs and sends of i-array are completed */ 5162 i = nrecvs; 5163 while (i--) { 5164 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5165 } 5166 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5167 5168 /* allocate buffers for sending j and a arrays */ 5169 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5170 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5171 5172 /* create i-array of B_oth */ 5173 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5174 b_othi[0] = 0; 5175 len = 0; /* total length of j or a array to be received */ 5176 k = 0; 5177 for (i=0; i<nrecvs; i++){ 5178 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5179 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5180 for (j=0; j<nrows; j++) { 5181 b_othi[k+1] = b_othi[k] + rowlen[j]; 5182 len += rowlen[j]; k++; 5183 } 5184 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5185 } 5186 5187 /* allocate space for j and a arrrays of B_oth */ 5188 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5189 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5190 5191 /* j-array */ 5192 /*---------*/ 5193 /* post receives of j-array */ 5194 for (i=0; i<nrecvs; i++){ 5195 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5196 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5197 } 5198 5199 /* pack the outgoing message j-array */ 5200 k = 0; 5201 for (i=0; i<nsends; i++){ 5202 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5203 bufJ = bufj+sstartsj[i]; 5204 for (j=0; j<nrows; j++) { 5205 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5206 for (ll=0; ll<sbs; ll++){ 5207 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5208 for (l=0; l<ncols; l++){ 5209 *bufJ++ = cols[l]; 5210 } 5211 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5212 } 5213 } 5214 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5215 } 5216 5217 /* recvs and sends of j-array are completed */ 5218 i = nrecvs; 5219 while (i--) { 5220 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5221 } 5222 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5223 } else if (scall == MAT_REUSE_MATRIX){ 5224 sstartsj = *startsj; 5225 rstartsj = *startsj_r; 5226 bufa = *bufa_ptr; 5227 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5228 b_otha = b_oth->a; 5229 } else { 5230 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5231 } 5232 5233 /* a-array */ 5234 /*---------*/ 5235 /* post receives of a-array */ 5236 for (i=0; i<nrecvs; i++){ 5237 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5238 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5239 } 5240 5241 /* pack the outgoing message a-array */ 5242 k = 0; 5243 for (i=0; i<nsends; i++){ 5244 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5245 bufA = bufa+sstartsj[i]; 5246 for (j=0; j<nrows; j++) { 5247 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5248 for (ll=0; ll<sbs; ll++){ 5249 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5250 for (l=0; l<ncols; l++){ 5251 *bufA++ = vals[l]; 5252 } 5253 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5254 } 5255 } 5256 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5257 } 5258 /* recvs and sends of a-array are completed */ 5259 i = nrecvs; 5260 while (i--) { 5261 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5262 } 5263 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5264 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5265 5266 if (scall == MAT_INITIAL_MATRIX){ 5267 /* put together the new matrix */ 5268 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5269 5270 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5271 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5272 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5273 b_oth->free_a = PETSC_TRUE; 5274 b_oth->free_ij = PETSC_TRUE; 5275 b_oth->nonew = 0; 5276 5277 ierr = PetscFree(bufj);CHKERRQ(ierr); 5278 if (!startsj || !bufa_ptr){ 5279 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5280 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5281 } else { 5282 *startsj = sstartsj; 5283 *startsj_r = rstartsj; 5284 *bufa_ptr = bufa; 5285 } 5286 } 5287 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5288 PetscFunctionReturn(0); 5289 } 5290 5291 #undef __FUNCT__ 5292 #define __FUNCT__ "MatGetCommunicationStructs" 5293 /*@C 5294 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5295 5296 Not Collective 5297 5298 Input Parameters: 5299 . A - The matrix in mpiaij format 5300 5301 Output Parameter: 5302 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5303 . colmap - A map from global column index to local index into lvec 5304 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5305 5306 Level: developer 5307 5308 @*/ 5309 #if defined (PETSC_USE_CTABLE) 5310 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5311 #else 5312 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5313 #endif 5314 { 5315 Mat_MPIAIJ *a; 5316 5317 PetscFunctionBegin; 5318 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5319 PetscValidPointer(lvec, 2); 5320 PetscValidPointer(colmap, 3); 5321 PetscValidPointer(multScatter, 4); 5322 a = (Mat_MPIAIJ *) A->data; 5323 if (lvec) *lvec = a->lvec; 5324 if (colmap) *colmap = a->colmap; 5325 if (multScatter) *multScatter = a->Mvctx; 5326 PetscFunctionReturn(0); 5327 } 5328 5329 EXTERN_C_BEGIN 5330 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5331 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5332 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5333 EXTERN_C_END 5334 5335 #undef __FUNCT__ 5336 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5337 /* 5338 Computes (B'*A')' since computing B*A directly is untenable 5339 5340 n p p 5341 ( ) ( ) ( ) 5342 m ( A ) * n ( B ) = m ( C ) 5343 ( ) ( ) ( ) 5344 5345 */ 5346 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5347 { 5348 PetscErrorCode ierr; 5349 Mat At,Bt,Ct; 5350 5351 PetscFunctionBegin; 5352 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5353 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5354 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5355 ierr = MatDestroy(&At);CHKERRQ(ierr); 5356 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5357 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5358 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5359 PetscFunctionReturn(0); 5360 } 5361 5362 #undef __FUNCT__ 5363 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5364 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5365 { 5366 PetscErrorCode ierr; 5367 PetscInt m=A->rmap->n,n=B->cmap->n; 5368 Mat Cmat; 5369 5370 PetscFunctionBegin; 5371 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5372 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5373 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5374 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5375 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5376 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5377 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5378 *C = Cmat; 5379 PetscFunctionReturn(0); 5380 } 5381 5382 /* ----------------------------------------------------------------*/ 5383 #undef __FUNCT__ 5384 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5385 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5386 { 5387 PetscErrorCode ierr; 5388 5389 PetscFunctionBegin; 5390 if (scall == MAT_INITIAL_MATRIX){ 5391 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5392 } 5393 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5394 PetscFunctionReturn(0); 5395 } 5396 5397 EXTERN_C_BEGIN 5398 #if defined(PETSC_HAVE_MUMPS) 5399 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5400 #endif 5401 #if defined(PETSC_HAVE_PASTIX) 5402 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5403 #endif 5404 #if defined(PETSC_HAVE_SUPERLU_DIST) 5405 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5406 #endif 5407 #if defined(PETSC_HAVE_SPOOLES) 5408 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5409 #endif 5410 EXTERN_C_END 5411 5412 /*MC 5413 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5414 5415 Options Database Keys: 5416 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5417 5418 Level: beginner 5419 5420 .seealso: MatCreateMPIAIJ() 5421 M*/ 5422 5423 EXTERN_C_BEGIN 5424 #undef __FUNCT__ 5425 #define __FUNCT__ "MatCreate_MPIAIJ" 5426 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5427 { 5428 Mat_MPIAIJ *b; 5429 PetscErrorCode ierr; 5430 PetscMPIInt size; 5431 5432 PetscFunctionBegin; 5433 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5434 5435 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5436 B->data = (void*)b; 5437 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5438 B->rmap->bs = 1; 5439 B->assembled = PETSC_FALSE; 5440 5441 B->insertmode = NOT_SET_VALUES; 5442 b->size = size; 5443 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5444 5445 /* build cache for off array entries formed */ 5446 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5447 b->donotstash = PETSC_FALSE; 5448 b->colmap = 0; 5449 b->garray = 0; 5450 b->roworiented = PETSC_TRUE; 5451 5452 /* stuff used for matrix vector multiply */ 5453 b->lvec = PETSC_NULL; 5454 b->Mvctx = PETSC_NULL; 5455 5456 /* stuff for MatGetRow() */ 5457 b->rowindices = 0; 5458 b->rowvalues = 0; 5459 b->getrowactive = PETSC_FALSE; 5460 5461 #if defined(PETSC_HAVE_SPOOLES) 5462 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5463 "MatGetFactor_mpiaij_spooles", 5464 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5465 #endif 5466 #if defined(PETSC_HAVE_MUMPS) 5467 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5468 "MatGetFactor_aij_mumps", 5469 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5470 #endif 5471 #if defined(PETSC_HAVE_PASTIX) 5472 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5473 "MatGetFactor_mpiaij_pastix", 5474 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5475 #endif 5476 #if defined(PETSC_HAVE_SUPERLU_DIST) 5477 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5478 "MatGetFactor_mpiaij_superlu_dist", 5479 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5480 #endif 5481 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5482 "MatStoreValues_MPIAIJ", 5483 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5484 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5485 "MatRetrieveValues_MPIAIJ", 5486 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5487 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5488 "MatGetDiagonalBlock_MPIAIJ", 5489 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5491 "MatIsTranspose_MPIAIJ", 5492 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5494 "MatMPIAIJSetPreallocation_MPIAIJ", 5495 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5497 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5498 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5500 "MatDiagonalScaleLocal_MPIAIJ", 5501 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5503 "MatConvert_MPIAIJ_MPIAIJPERM", 5504 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5505 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5506 "MatConvert_MPIAIJ_MPIAIJCRL", 5507 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5509 "MatConvert_MPIAIJ_MPISBAIJ", 5510 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5512 "MatMatMult_MPIDense_MPIAIJ", 5513 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5515 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5516 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5518 "MatMatMultNumeric_MPIDense_MPIAIJ", 5519 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5520 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5521 PetscFunctionReturn(0); 5522 } 5523 EXTERN_C_END 5524 5525 #undef __FUNCT__ 5526 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5527 /*@ 5528 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5529 and "off-diagonal" part of the matrix in CSR format. 5530 5531 Collective on MPI_Comm 5532 5533 Input Parameters: 5534 + comm - MPI communicator 5535 . m - number of local rows (Cannot be PETSC_DECIDE) 5536 . n - This value should be the same as the local size used in creating the 5537 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5538 calculated if N is given) For square matrices n is almost always m. 5539 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5540 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5541 . i - row indices for "diagonal" portion of matrix 5542 . j - column indices 5543 . a - matrix values 5544 . oi - row indices for "off-diagonal" portion of matrix 5545 . oj - column indices 5546 - oa - matrix values 5547 5548 Output Parameter: 5549 . mat - the matrix 5550 5551 Level: advanced 5552 5553 Notes: 5554 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5555 must free the arrays once the matrix has been destroyed and not before. 5556 5557 The i and j indices are 0 based 5558 5559 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5560 5561 This sets local rows and cannot be used to set off-processor values. 5562 5563 You cannot later use MatSetValues() to change values in this matrix. 5564 5565 .keywords: matrix, aij, compressed row, sparse, parallel 5566 5567 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5568 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5569 @*/ 5570 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5571 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5572 { 5573 PetscErrorCode ierr; 5574 Mat_MPIAIJ *maij; 5575 5576 PetscFunctionBegin; 5577 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5578 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5579 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5580 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5581 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5582 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5583 maij = (Mat_MPIAIJ*) (*mat)->data; 5584 maij->donotstash = PETSC_TRUE; 5585 (*mat)->preallocated = PETSC_TRUE; 5586 5587 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5588 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5589 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5590 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5591 5592 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5593 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5594 5595 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5596 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5597 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5598 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5599 5600 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5601 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 /* 5606 Special version for direct calls from Fortran 5607 */ 5608 #include <private/fortranimpl.h> 5609 5610 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5611 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5612 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5613 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5614 #endif 5615 5616 /* Change these macros so can be used in void function */ 5617 #undef CHKERRQ 5618 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5619 #undef SETERRQ2 5620 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5621 #undef SETERRQ 5622 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5623 5624 EXTERN_C_BEGIN 5625 #undef __FUNCT__ 5626 #define __FUNCT__ "matsetvaluesmpiaij_" 5627 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5628 { 5629 Mat mat = *mmat; 5630 PetscInt m = *mm, n = *mn; 5631 InsertMode addv = *maddv; 5632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5633 PetscScalar value; 5634 PetscErrorCode ierr; 5635 5636 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5637 if (mat->insertmode == NOT_SET_VALUES) { 5638 mat->insertmode = addv; 5639 } 5640 #if defined(PETSC_USE_DEBUG) 5641 else if (mat->insertmode != addv) { 5642 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5643 } 5644 #endif 5645 { 5646 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5648 PetscBool roworiented = aij->roworiented; 5649 5650 /* Some Variables required in the macro */ 5651 Mat A = aij->A; 5652 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5653 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5654 MatScalar *aa = a->a; 5655 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5656 Mat B = aij->B; 5657 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5658 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5659 MatScalar *ba = b->a; 5660 5661 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5662 PetscInt nonew = a->nonew; 5663 MatScalar *ap1,*ap2; 5664 5665 PetscFunctionBegin; 5666 for (i=0; i<m; i++) { 5667 if (im[i] < 0) continue; 5668 #if defined(PETSC_USE_DEBUG) 5669 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5670 #endif 5671 if (im[i] >= rstart && im[i] < rend) { 5672 row = im[i] - rstart; 5673 lastcol1 = -1; 5674 rp1 = aj + ai[row]; 5675 ap1 = aa + ai[row]; 5676 rmax1 = aimax[row]; 5677 nrow1 = ailen[row]; 5678 low1 = 0; 5679 high1 = nrow1; 5680 lastcol2 = -1; 5681 rp2 = bj + bi[row]; 5682 ap2 = ba + bi[row]; 5683 rmax2 = bimax[row]; 5684 nrow2 = bilen[row]; 5685 low2 = 0; 5686 high2 = nrow2; 5687 5688 for (j=0; j<n; j++) { 5689 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5690 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5691 if (in[j] >= cstart && in[j] < cend){ 5692 col = in[j] - cstart; 5693 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5694 } else if (in[j] < 0) continue; 5695 #if defined(PETSC_USE_DEBUG) 5696 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5697 #endif 5698 else { 5699 if (mat->was_assembled) { 5700 if (!aij->colmap) { 5701 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5702 } 5703 #if defined (PETSC_USE_CTABLE) 5704 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5705 col--; 5706 #else 5707 col = aij->colmap[in[j]] - 1; 5708 #endif 5709 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5710 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5711 col = in[j]; 5712 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5713 B = aij->B; 5714 b = (Mat_SeqAIJ*)B->data; 5715 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5716 rp2 = bj + bi[row]; 5717 ap2 = ba + bi[row]; 5718 rmax2 = bimax[row]; 5719 nrow2 = bilen[row]; 5720 low2 = 0; 5721 high2 = nrow2; 5722 bm = aij->B->rmap->n; 5723 ba = b->a; 5724 } 5725 } else col = in[j]; 5726 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5727 } 5728 } 5729 } else { 5730 if (!aij->donotstash) { 5731 if (roworiented) { 5732 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5733 } else { 5734 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5735 } 5736 } 5737 } 5738 }} 5739 PetscFunctionReturnVoid(); 5740 } 5741 EXTERN_C_END 5742 5743