1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2511 PetscInt nzlocal,nsends,nrecvs; 2512 PetscMPIInt *send_rank,*recv_rank; 2513 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2514 PetscScalar *sbuf_a,**rbuf_a; 2515 PetscErrorCode (*Destroy)(Mat); 2516 } Mat_Redundant; 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2521 { 2522 PetscErrorCode ierr; 2523 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2524 PetscInt i; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2530 for (i=0; i<redund->nrecvs; i++) { 2531 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2532 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2533 } 2534 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2535 ierr = PetscFree(redund);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatDestroy_MatRedundant" 2541 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2542 { 2543 PetscErrorCode ierr; 2544 PetscContainer container; 2545 Mat_Redundant *redund=NULL; 2546 2547 PetscFunctionBegin; 2548 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2549 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2550 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2551 2552 A->ops->destroy = redund->Destroy; 2553 2554 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2555 if (A->ops->destroy) { 2556 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2564 { 2565 PetscMPIInt rank,size; 2566 MPI_Comm comm,subcomm=psubcomm->comm; 2567 PetscErrorCode ierr; 2568 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,nsubcomm=psubcomm->n; 2569 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2570 PetscInt *rowrange = mat->rmap->range; 2571 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2572 Mat A = aij->A,B=aij->B,C=*matredundant; 2573 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2574 PetscScalar *sbuf_a; 2575 PetscInt nzlocal=a->nz+b->nz; 2576 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2577 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N,m,n; 2578 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2579 MatScalar *aworkA,*aworkB; 2580 PetscScalar *vals; 2581 PetscMPIInt tag1,tag2,tag3,imdex; 2582 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2583 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2584 MPI_Status recv_status,*send_status; 2585 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2586 PetscInt **rbuf_j=NULL; 2587 PetscScalar **rbuf_a=NULL; 2588 Mat_Redundant *redund =NULL; 2589 PetscContainer container; 2590 PetscInt rstart_sub,rend_sub,mloc_sub; 2591 2592 PetscFunctionBegin; 2593 if (psubcomm->type != PETSC_SUBCOMM_INTERLACED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Input psubcomm must be PETSC_SUBCOMM_INTERLACED type"); 2594 ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr); 2595 if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N); 2596 2597 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2598 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2599 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2600 2601 if (reuse == MAT_REUSE_MATRIX) { 2602 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2603 ierr = MatGetLocalSize(C,&m,&n);CHKERRQ(ierr); 2604 if (m != mat->rmap->n || n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2605 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2606 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2607 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2608 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2609 2610 nsends = redund->nsends; 2611 nrecvs = redund->nrecvs; 2612 send_rank = redund->send_rank; 2613 recv_rank = redund->recv_rank; 2614 sbuf_nz = redund->sbuf_nz; 2615 rbuf_nz = redund->rbuf_nz; 2616 sbuf_j = redund->sbuf_j; 2617 sbuf_a = redund->sbuf_a; 2618 rbuf_j = redund->rbuf_j; 2619 rbuf_a = redund->rbuf_a; 2620 } 2621 2622 if (reuse == MAT_INITIAL_MATRIX) { 2623 PetscMPIInt subrank,subsize; 2624 PetscInt nleftover,np_subcomm; 2625 2626 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2627 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2628 2629 /* get local size of redundant matrix */ 2630 const PetscInt *range; 2631 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2632 rstart_sub = range[nsubcomm*subrank]; 2633 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2634 rend_sub = range[nsubcomm*(subrank+1)]; 2635 } else { 2636 rend_sub = mat->rmap->N; 2637 } 2638 2639 mloc_sub = rend_sub - rstart_sub; 2640 /* printf("[%d] rstart_sub %d %d, mloc_sub %d\n",rank,rstart_sub,rend_sub, mloc_sub); */ 2641 2642 /* get the destination processors' id send_rank, nsends and nrecvs */ 2643 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2644 2645 np_subcomm = size/nsubcomm; 2646 nleftover = size - nsubcomm*np_subcomm; 2647 2648 nsends = 0; nrecvs = 0; 2649 for (i=0; i<size; i++) { 2650 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2651 send_rank[nsends] = i; nsends++; 2652 recv_rank[nrecvs++] = i; 2653 } 2654 } 2655 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2656 i = size-nleftover-1; 2657 j = 0; 2658 while (j < nsubcomm - nleftover) { 2659 send_rank[nsends++] = i; 2660 i--; j++; 2661 } 2662 } 2663 2664 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2665 for (i=0; i<nleftover; i++) { 2666 recv_rank[nrecvs++] = size-nleftover+i; 2667 } 2668 } 2669 2670 /* allocate sbuf_j, sbuf_a */ 2671 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2672 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2673 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2674 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2675 2676 /* copy mat's local entries into the buffers */ 2677 if (reuse == MAT_INITIAL_MATRIX) { 2678 rownz_max = 0; 2679 rptr = sbuf_j; 2680 cols = sbuf_j + rend-rstart + 1; 2681 vals = sbuf_a; 2682 rptr[0] = 0; 2683 for (i=0; i<rend-rstart; i++) { 2684 row = i + rstart; 2685 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2686 ncols = nzA + nzB; 2687 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2688 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2689 /* load the column indices for this row into cols */ 2690 lwrite = 0; 2691 for (l=0; l<nzB; l++) { 2692 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2693 vals[lwrite] = aworkB[l]; 2694 cols[lwrite++] = ctmp; 2695 } 2696 } 2697 for (l=0; l<nzA; l++) { 2698 vals[lwrite] = aworkA[l]; 2699 cols[lwrite++] = cstart + cworkA[l]; 2700 } 2701 for (l=0; l<nzB; l++) { 2702 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2703 vals[lwrite] = aworkB[l]; 2704 cols[lwrite++] = ctmp; 2705 } 2706 } 2707 vals += ncols; 2708 cols += ncols; 2709 rptr[i+1] = rptr[i] + ncols; 2710 if (rownz_max < ncols) rownz_max = ncols; 2711 } 2712 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2713 } else { /* only copy matrix values into sbuf_a */ 2714 rptr = sbuf_j; 2715 vals = sbuf_a; 2716 rptr[0] = 0; 2717 for (i=0; i<rend-rstart; i++) { 2718 row = i + rstart; 2719 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2720 ncols = nzA + nzB; 2721 cworkB = b->j + b->i[i]; 2722 aworkA = a->a + a->i[i]; 2723 aworkB = b->a + b->i[i]; 2724 lwrite = 0; 2725 for (l=0; l<nzB; l++) { 2726 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2727 } 2728 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2729 for (l=0; l<nzB; l++) { 2730 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2731 } 2732 vals += ncols; 2733 rptr[i+1] = rptr[i] + ncols; 2734 } 2735 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2736 2737 /* send nzlocal to others, and recv other's nzlocal */ 2738 /*--------------------------------------------------*/ 2739 if (reuse == MAT_INITIAL_MATRIX) { 2740 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2741 2742 s_waits2 = s_waits3 + nsends; 2743 s_waits1 = s_waits2 + nsends; 2744 r_waits1 = s_waits1 + nsends; 2745 r_waits2 = r_waits1 + nrecvs; 2746 r_waits3 = r_waits2 + nrecvs; 2747 } else { 2748 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2749 2750 r_waits3 = s_waits3 + nsends; 2751 } 2752 2753 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2754 if (reuse == MAT_INITIAL_MATRIX) { 2755 /* get new tags to keep the communication clean */ 2756 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2757 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2758 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2759 2760 /* post receives of other's nzlocal */ 2761 for (i=0; i<nrecvs; i++) { 2762 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2763 } 2764 /* send nzlocal to others */ 2765 for (i=0; i<nsends; i++) { 2766 sbuf_nz[i] = nzlocal; 2767 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2768 } 2769 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2770 count = nrecvs; 2771 while (count) { 2772 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2773 2774 recv_rank[imdex] = recv_status.MPI_SOURCE; 2775 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2776 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2777 2778 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2779 2780 rbuf_nz[imdex] += i + 2; 2781 2782 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2783 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2784 count--; 2785 } 2786 /* wait on sends of nzlocal */ 2787 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2788 /* send mat->i,j to others, and recv from other's */ 2789 /*------------------------------------------------*/ 2790 for (i=0; i<nsends; i++) { 2791 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2792 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2793 } 2794 /* wait on receives of mat->i,j */ 2795 /*------------------------------*/ 2796 count = nrecvs; 2797 while (count) { 2798 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2799 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2800 count--; 2801 } 2802 /* wait on sends of mat->i,j */ 2803 /*---------------------------*/ 2804 if (nsends) { 2805 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2806 } 2807 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2808 2809 /* post receives, send and receive mat->a */ 2810 /*----------------------------------------*/ 2811 for (imdex=0; imdex<nrecvs; imdex++) { 2812 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2813 } 2814 for (i=0; i<nsends; i++) { 2815 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2816 } 2817 count = nrecvs; 2818 while (count) { 2819 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2820 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2821 count--; 2822 } 2823 if (nsends) { 2824 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2825 } 2826 2827 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2828 2829 /* create redundant matrix */ 2830 /*-------------------------*/ 2831 if (reuse == MAT_INITIAL_MATRIX) { 2832 /* compute rownz_max for preallocation */ 2833 for (imdex=0; imdex<nrecvs; imdex++) { 2834 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2835 rptr = rbuf_j[imdex]; 2836 for (i=0; i<j; i++) { 2837 ncols = rptr[i+1] - rptr[i]; 2838 if (rownz_max < ncols) rownz_max = ncols; 2839 } 2840 } 2841 2842 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2843 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2844 /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */ 2845 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2846 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2847 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2848 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2849 } else { 2850 C = *matredundant; 2851 } 2852 2853 /* insert local matrix entries */ 2854 rptr = sbuf_j; 2855 cols = sbuf_j + rend-rstart + 1; 2856 vals = sbuf_a; 2857 for (i=0; i<rend-rstart; i++) { 2858 row = i + rstart; 2859 ncols = rptr[i+1] - rptr[i]; 2860 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2861 vals += ncols; 2862 cols += ncols; 2863 } 2864 /* insert received matrix entries */ 2865 for (imdex=0; imdex<nrecvs; imdex++) { 2866 rstart = rowrange[recv_rank[imdex]]; 2867 rend = rowrange[recv_rank[imdex]+1]; 2868 rptr = rbuf_j[imdex]; 2869 cols = rbuf_j[imdex] + rend-rstart + 1; 2870 vals = rbuf_a[imdex]; 2871 for (i=0; i<rend-rstart; i++) { 2872 row = i + rstart; 2873 ncols = rptr[i+1] - rptr[i]; 2874 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2875 vals += ncols; 2876 cols += ncols; 2877 } 2878 } 2879 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2881 2882 if (reuse == MAT_INITIAL_MATRIX) { 2883 PetscContainer container; 2884 *matredundant = C; 2885 /* create a supporting struct and attach it to C for reuse */ 2886 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2887 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2888 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2889 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2890 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2891 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2892 2893 redund->nzlocal = nzlocal; 2894 redund->nsends = nsends; 2895 redund->nrecvs = nrecvs; 2896 redund->send_rank = send_rank; 2897 redund->recv_rank = recv_rank; 2898 redund->sbuf_nz = sbuf_nz; 2899 redund->rbuf_nz = rbuf_nz; 2900 redund->sbuf_j = sbuf_j; 2901 redund->sbuf_a = sbuf_a; 2902 redund->rbuf_j = rbuf_j; 2903 redund->rbuf_a = rbuf_a; 2904 2905 redund->Destroy = C->ops->destroy; 2906 C->ops->destroy = MatDestroy_MatRedundant; 2907 } 2908 PetscFunctionReturn(0); 2909 } 2910 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2913 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2914 { 2915 PetscErrorCode ierr; 2916 PetscMPIInt rank,size; 2917 MPI_Comm comm; 2918 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2919 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2920 PetscInt *rowrange = mat->rmap->range; 2921 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2922 Mat A = aij->A,B=aij->B,C=*matredundant; 2923 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2924 PetscScalar *sbuf_a; 2925 PetscInt nzlocal=a->nz+b->nz; 2926 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2927 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2928 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2929 MatScalar *aworkA,*aworkB; 2930 PetscScalar *vals; 2931 PetscMPIInt tag1,tag2,tag3,imdex; 2932 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2933 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2934 MPI_Status recv_status,*send_status; 2935 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2936 PetscInt **rbuf_j=NULL; 2937 PetscScalar **rbuf_a=NULL; 2938 Mat_Redundant *redund =NULL; 2939 PetscContainer container; 2940 2941 PetscFunctionBegin; 2942 /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */ 2943 /* 2944 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,psubcomm,reuse,matredundant);CHKERRQ(ierr); 2945 PetscFunctionReturn(0); 2946 */ 2947 2948 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2949 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2950 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2951 2952 if (reuse == MAT_REUSE_MATRIX) { 2953 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2954 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2955 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2956 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2957 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2958 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2959 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2960 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2961 2962 nsends = redund->nsends; 2963 nrecvs = redund->nrecvs; 2964 send_rank = redund->send_rank; 2965 recv_rank = redund->recv_rank; 2966 sbuf_nz = redund->sbuf_nz; 2967 rbuf_nz = redund->rbuf_nz; 2968 sbuf_j = redund->sbuf_j; 2969 sbuf_a = redund->sbuf_a; 2970 rbuf_j = redund->rbuf_j; 2971 rbuf_a = redund->rbuf_a; 2972 } 2973 2974 if (reuse == MAT_INITIAL_MATRIX) { 2975 PetscMPIInt subrank,subsize; 2976 PetscInt nleftover,np_subcomm; 2977 /* get the destination processors' id send_rank, nsends and nrecvs */ 2978 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2979 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2980 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2981 2982 np_subcomm = size/nsubcomm; 2983 nleftover = size - nsubcomm*np_subcomm; 2984 2985 nsends = 0; nrecvs = 0; 2986 for (i=0; i<size; i++) { /* i=rank*/ 2987 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2988 send_rank[nsends] = i; nsends++; 2989 recv_rank[nrecvs++] = i; 2990 } 2991 } 2992 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2993 i = size-nleftover-1; 2994 j = 0; 2995 while (j < nsubcomm - nleftover) { 2996 send_rank[nsends++] = i; 2997 i--; j++; 2998 } 2999 } 3000 3001 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 3002 for (i=0; i<nleftover; i++) { 3003 recv_rank[nrecvs++] = size-nleftover+i; 3004 } 3005 } 3006 3007 /* allocate sbuf_j, sbuf_a */ 3008 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 3009 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 3010 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 3011 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 3012 3013 /* copy mat's local entries into the buffers */ 3014 if (reuse == MAT_INITIAL_MATRIX) { 3015 rownz_max = 0; 3016 rptr = sbuf_j; 3017 cols = sbuf_j + rend-rstart + 1; 3018 vals = sbuf_a; 3019 rptr[0] = 0; 3020 for (i=0; i<rend-rstart; i++) { 3021 row = i + rstart; 3022 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 3023 ncols = nzA + nzB; 3024 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 3025 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 3026 /* load the column indices for this row into cols */ 3027 lwrite = 0; 3028 for (l=0; l<nzB; l++) { 3029 if ((ctmp = bmap[cworkB[l]]) < cstart) { 3030 vals[lwrite] = aworkB[l]; 3031 cols[lwrite++] = ctmp; 3032 } 3033 } 3034 for (l=0; l<nzA; l++) { 3035 vals[lwrite] = aworkA[l]; 3036 cols[lwrite++] = cstart + cworkA[l]; 3037 } 3038 for (l=0; l<nzB; l++) { 3039 if ((ctmp = bmap[cworkB[l]]) >= cend) { 3040 vals[lwrite] = aworkB[l]; 3041 cols[lwrite++] = ctmp; 3042 } 3043 } 3044 vals += ncols; 3045 cols += ncols; 3046 rptr[i+1] = rptr[i] + ncols; 3047 if (rownz_max < ncols) rownz_max = ncols; 3048 } 3049 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 3050 } else { /* only copy matrix values into sbuf_a */ 3051 rptr = sbuf_j; 3052 vals = sbuf_a; 3053 rptr[0] = 0; 3054 for (i=0; i<rend-rstart; i++) { 3055 row = i + rstart; 3056 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 3057 ncols = nzA + nzB; 3058 cworkB = b->j + b->i[i]; 3059 aworkA = a->a + a->i[i]; 3060 aworkB = b->a + b->i[i]; 3061 lwrite = 0; 3062 for (l=0; l<nzB; l++) { 3063 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 3064 } 3065 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 3066 for (l=0; l<nzB; l++) { 3067 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 3068 } 3069 vals += ncols; 3070 rptr[i+1] = rptr[i] + ncols; 3071 } 3072 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 3073 3074 /* send nzlocal to others, and recv other's nzlocal */ 3075 /*--------------------------------------------------*/ 3076 if (reuse == MAT_INITIAL_MATRIX) { 3077 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 3078 3079 s_waits2 = s_waits3 + nsends; 3080 s_waits1 = s_waits2 + nsends; 3081 r_waits1 = s_waits1 + nsends; 3082 r_waits2 = r_waits1 + nrecvs; 3083 r_waits3 = r_waits2 + nrecvs; 3084 } else { 3085 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 3086 3087 r_waits3 = s_waits3 + nsends; 3088 } 3089 3090 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 3091 if (reuse == MAT_INITIAL_MATRIX) { 3092 /* get new tags to keep the communication clean */ 3093 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 3094 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 3095 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 3096 3097 /* post receives of other's nzlocal */ 3098 for (i=0; i<nrecvs; i++) { 3099 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 3100 } 3101 /* send nzlocal to others */ 3102 for (i=0; i<nsends; i++) { 3103 sbuf_nz[i] = nzlocal; 3104 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 3105 } 3106 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 3107 count = nrecvs; 3108 while (count) { 3109 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 3110 3111 recv_rank[imdex] = recv_status.MPI_SOURCE; 3112 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 3113 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 3114 3115 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 3116 3117 rbuf_nz[imdex] += i + 2; 3118 3119 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 3120 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 3121 count--; 3122 } 3123 /* wait on sends of nzlocal */ 3124 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 3125 /* send mat->i,j to others, and recv from other's */ 3126 /*------------------------------------------------*/ 3127 for (i=0; i<nsends; i++) { 3128 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 3129 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 3130 } 3131 /* wait on receives of mat->i,j */ 3132 /*------------------------------*/ 3133 count = nrecvs; 3134 while (count) { 3135 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 3136 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 3137 count--; 3138 } 3139 /* wait on sends of mat->i,j */ 3140 /*---------------------------*/ 3141 if (nsends) { 3142 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 3143 } 3144 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 3145 3146 /* post receives, send and receive mat->a */ 3147 /*----------------------------------------*/ 3148 for (imdex=0; imdex<nrecvs; imdex++) { 3149 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 3150 } 3151 for (i=0; i<nsends; i++) { 3152 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 3153 } 3154 count = nrecvs; 3155 while (count) { 3156 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 3157 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 3158 count--; 3159 } 3160 if (nsends) { 3161 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 3162 } 3163 3164 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 3165 3166 /* create redundant matrix */ 3167 /*-------------------------*/ 3168 if (reuse == MAT_INITIAL_MATRIX) { 3169 /* compute rownz_max for preallocation */ 3170 for (imdex=0; imdex<nrecvs; imdex++) { 3171 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 3172 rptr = rbuf_j[imdex]; 3173 for (i=0; i<j; i++) { 3174 ncols = rptr[i+1] - rptr[i]; 3175 if (rownz_max < ncols) rownz_max = ncols; 3176 } 3177 } 3178 3179 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 3180 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3181 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 3182 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 3183 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 3184 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 3185 } else { 3186 C = *matredundant; 3187 } 3188 3189 /* insert local matrix entries */ 3190 rptr = sbuf_j; 3191 cols = sbuf_j + rend-rstart + 1; 3192 vals = sbuf_a; 3193 for (i=0; i<rend-rstart; i++) { 3194 row = i + rstart; 3195 ncols = rptr[i+1] - rptr[i]; 3196 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3197 vals += ncols; 3198 cols += ncols; 3199 } 3200 /* insert received matrix entries */ 3201 for (imdex=0; imdex<nrecvs; imdex++) { 3202 rstart = rowrange[recv_rank[imdex]]; 3203 rend = rowrange[recv_rank[imdex]+1]; 3204 rptr = rbuf_j[imdex]; 3205 cols = rbuf_j[imdex] + rend-rstart + 1; 3206 vals = rbuf_a[imdex]; 3207 for (i=0; i<rend-rstart; i++) { 3208 row = i + rstart; 3209 ncols = rptr[i+1] - rptr[i]; 3210 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3211 vals += ncols; 3212 cols += ncols; 3213 } 3214 } 3215 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3217 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 3218 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 3219 if (reuse == MAT_INITIAL_MATRIX) { 3220 PetscContainer container; 3221 *matredundant = C; 3222 /* create a supporting struct and attach it to C for reuse */ 3223 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 3224 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3225 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 3226 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 3227 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 3228 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 3229 3230 redund->nzlocal = nzlocal; 3231 redund->nsends = nsends; 3232 redund->nrecvs = nrecvs; 3233 redund->send_rank = send_rank; 3234 redund->recv_rank = recv_rank; 3235 redund->sbuf_nz = sbuf_nz; 3236 redund->rbuf_nz = rbuf_nz; 3237 redund->sbuf_j = sbuf_j; 3238 redund->sbuf_a = sbuf_a; 3239 redund->rbuf_j = rbuf_j; 3240 redund->rbuf_a = rbuf_a; 3241 3242 redund->Destroy = C->ops->destroy; 3243 C->ops->destroy = MatDestroy_MatRedundant; 3244 } 3245 PetscFunctionReturn(0); 3246 } 3247 3248 #undef __FUNCT__ 3249 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3250 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3251 { 3252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3253 PetscErrorCode ierr; 3254 PetscInt i,*idxb = 0; 3255 PetscScalar *va,*vb; 3256 Vec vtmp; 3257 3258 PetscFunctionBegin; 3259 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3260 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3261 if (idx) { 3262 for (i=0; i<A->rmap->n; i++) { 3263 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3264 } 3265 } 3266 3267 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3268 if (idx) { 3269 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3270 } 3271 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3272 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3273 3274 for (i=0; i<A->rmap->n; i++) { 3275 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3276 va[i] = vb[i]; 3277 if (idx) idx[i] = a->garray[idxb[i]]; 3278 } 3279 } 3280 3281 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3282 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3283 ierr = PetscFree(idxb);CHKERRQ(ierr); 3284 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3285 PetscFunctionReturn(0); 3286 } 3287 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3290 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3291 { 3292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3293 PetscErrorCode ierr; 3294 PetscInt i,*idxb = 0; 3295 PetscScalar *va,*vb; 3296 Vec vtmp; 3297 3298 PetscFunctionBegin; 3299 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3300 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3301 if (idx) { 3302 for (i=0; i<A->cmap->n; i++) { 3303 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3304 } 3305 } 3306 3307 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3308 if (idx) { 3309 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3310 } 3311 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3312 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3313 3314 for (i=0; i<A->rmap->n; i++) { 3315 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3316 va[i] = vb[i]; 3317 if (idx) idx[i] = a->garray[idxb[i]]; 3318 } 3319 } 3320 3321 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3322 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3323 ierr = PetscFree(idxb);CHKERRQ(ierr); 3324 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3325 PetscFunctionReturn(0); 3326 } 3327 3328 #undef __FUNCT__ 3329 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3330 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3331 { 3332 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3333 PetscInt n = A->rmap->n; 3334 PetscInt cstart = A->cmap->rstart; 3335 PetscInt *cmap = mat->garray; 3336 PetscInt *diagIdx, *offdiagIdx; 3337 Vec diagV, offdiagV; 3338 PetscScalar *a, *diagA, *offdiagA; 3339 PetscInt r; 3340 PetscErrorCode ierr; 3341 3342 PetscFunctionBegin; 3343 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3344 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3345 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3346 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3347 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3348 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3349 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3350 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3351 for (r = 0; r < n; ++r) { 3352 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3353 a[r] = diagA[r]; 3354 idx[r] = cstart + diagIdx[r]; 3355 } else { 3356 a[r] = offdiagA[r]; 3357 idx[r] = cmap[offdiagIdx[r]]; 3358 } 3359 } 3360 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3361 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3362 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3363 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3364 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3365 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3366 PetscFunctionReturn(0); 3367 } 3368 3369 #undef __FUNCT__ 3370 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3371 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3372 { 3373 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3374 PetscInt n = A->rmap->n; 3375 PetscInt cstart = A->cmap->rstart; 3376 PetscInt *cmap = mat->garray; 3377 PetscInt *diagIdx, *offdiagIdx; 3378 Vec diagV, offdiagV; 3379 PetscScalar *a, *diagA, *offdiagA; 3380 PetscInt r; 3381 PetscErrorCode ierr; 3382 3383 PetscFunctionBegin; 3384 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3385 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3386 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3387 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3388 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3389 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3390 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3391 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3392 for (r = 0; r < n; ++r) { 3393 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3394 a[r] = diagA[r]; 3395 idx[r] = cstart + diagIdx[r]; 3396 } else { 3397 a[r] = offdiagA[r]; 3398 idx[r] = cmap[offdiagIdx[r]]; 3399 } 3400 } 3401 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3402 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3403 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3404 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3405 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3406 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3412 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3413 { 3414 PetscErrorCode ierr; 3415 Mat *dummy; 3416 3417 PetscFunctionBegin; 3418 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3419 *newmat = *dummy; 3420 ierr = PetscFree(dummy);CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } 3423 3424 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3425 3426 #undef __FUNCT__ 3427 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3428 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3429 { 3430 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3431 PetscErrorCode ierr; 3432 3433 PetscFunctionBegin; 3434 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 #undef __FUNCT__ 3439 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3440 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3441 { 3442 PetscErrorCode ierr; 3443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3444 3445 PetscFunctionBegin; 3446 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3447 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3448 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3449 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 3453 /* -------------------------------------------------------------------*/ 3454 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3455 MatGetRow_MPIAIJ, 3456 MatRestoreRow_MPIAIJ, 3457 MatMult_MPIAIJ, 3458 /* 4*/ MatMultAdd_MPIAIJ, 3459 MatMultTranspose_MPIAIJ, 3460 MatMultTransposeAdd_MPIAIJ, 3461 #if defined(PETSC_HAVE_PBGL) 3462 MatSolve_MPIAIJ, 3463 #else 3464 0, 3465 #endif 3466 0, 3467 0, 3468 /*10*/ 0, 3469 0, 3470 0, 3471 MatSOR_MPIAIJ, 3472 MatTranspose_MPIAIJ, 3473 /*15*/ MatGetInfo_MPIAIJ, 3474 MatEqual_MPIAIJ, 3475 MatGetDiagonal_MPIAIJ, 3476 MatDiagonalScale_MPIAIJ, 3477 MatNorm_MPIAIJ, 3478 /*20*/ MatAssemblyBegin_MPIAIJ, 3479 MatAssemblyEnd_MPIAIJ, 3480 MatSetOption_MPIAIJ, 3481 MatZeroEntries_MPIAIJ, 3482 /*24*/ MatZeroRows_MPIAIJ, 3483 0, 3484 #if defined(PETSC_HAVE_PBGL) 3485 0, 3486 #else 3487 0, 3488 #endif 3489 0, 3490 0, 3491 /*29*/ MatSetUp_MPIAIJ, 3492 #if defined(PETSC_HAVE_PBGL) 3493 0, 3494 #else 3495 0, 3496 #endif 3497 0, 3498 0, 3499 0, 3500 /*34*/ MatDuplicate_MPIAIJ, 3501 0, 3502 0, 3503 0, 3504 0, 3505 /*39*/ MatAXPY_MPIAIJ, 3506 MatGetSubMatrices_MPIAIJ, 3507 MatIncreaseOverlap_MPIAIJ, 3508 MatGetValues_MPIAIJ, 3509 MatCopy_MPIAIJ, 3510 /*44*/ MatGetRowMax_MPIAIJ, 3511 MatScale_MPIAIJ, 3512 0, 3513 0, 3514 MatZeroRowsColumns_MPIAIJ, 3515 /*49*/ MatSetRandom_MPIAIJ, 3516 0, 3517 0, 3518 0, 3519 0, 3520 /*54*/ MatFDColoringCreate_MPIAIJ, 3521 0, 3522 MatSetUnfactored_MPIAIJ, 3523 MatPermute_MPIAIJ, 3524 0, 3525 /*59*/ MatGetSubMatrix_MPIAIJ, 3526 MatDestroy_MPIAIJ, 3527 MatView_MPIAIJ, 3528 0, 3529 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3530 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3531 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3532 0, 3533 0, 3534 0, 3535 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3536 MatGetRowMinAbs_MPIAIJ, 3537 0, 3538 MatSetColoring_MPIAIJ, 3539 0, 3540 MatSetValuesAdifor_MPIAIJ, 3541 /*75*/ MatFDColoringApply_AIJ, 3542 0, 3543 0, 3544 0, 3545 MatFindZeroDiagonals_MPIAIJ, 3546 /*80*/ 0, 3547 0, 3548 0, 3549 /*83*/ MatLoad_MPIAIJ, 3550 0, 3551 0, 3552 0, 3553 0, 3554 0, 3555 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3556 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3557 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3558 MatPtAP_MPIAIJ_MPIAIJ, 3559 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3560 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3561 0, 3562 0, 3563 0, 3564 0, 3565 /*99*/ 0, 3566 0, 3567 0, 3568 MatConjugate_MPIAIJ, 3569 0, 3570 /*104*/MatSetValuesRow_MPIAIJ, 3571 MatRealPart_MPIAIJ, 3572 MatImaginaryPart_MPIAIJ, 3573 0, 3574 0, 3575 /*109*/0, 3576 MatGetRedundantMatrix_MPIAIJ, 3577 MatGetRowMin_MPIAIJ, 3578 0, 3579 0, 3580 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3581 0, 3582 0, 3583 0, 3584 0, 3585 /*119*/0, 3586 0, 3587 0, 3588 0, 3589 MatGetMultiProcBlock_MPIAIJ, 3590 /*124*/MatFindNonzeroRows_MPIAIJ, 3591 MatGetColumnNorms_MPIAIJ, 3592 MatInvertBlockDiagonal_MPIAIJ, 3593 0, 3594 MatGetSubMatricesParallel_MPIAIJ, 3595 /*129*/0, 3596 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3597 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3598 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3599 0, 3600 /*134*/0, 3601 0, 3602 0, 3603 0, 3604 0, 3605 /*139*/0, 3606 0 3607 }; 3608 3609 /* ----------------------------------------------------------------------------------------*/ 3610 3611 #undef __FUNCT__ 3612 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3613 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3614 { 3615 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3616 PetscErrorCode ierr; 3617 3618 PetscFunctionBegin; 3619 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3620 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3621 PetscFunctionReturn(0); 3622 } 3623 3624 #undef __FUNCT__ 3625 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3626 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3627 { 3628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3629 PetscErrorCode ierr; 3630 3631 PetscFunctionBegin; 3632 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3633 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3634 PetscFunctionReturn(0); 3635 } 3636 3637 #undef __FUNCT__ 3638 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3639 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3640 { 3641 Mat_MPIAIJ *b; 3642 PetscErrorCode ierr; 3643 3644 PetscFunctionBegin; 3645 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3647 b = (Mat_MPIAIJ*)B->data; 3648 3649 if (!B->preallocated) { 3650 /* Explicitly create 2 MATSEQAIJ matrices. */ 3651 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3652 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3653 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3654 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3655 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3656 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3657 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3658 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3659 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3660 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3661 } 3662 3663 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3664 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3665 B->preallocated = PETSC_TRUE; 3666 PetscFunctionReturn(0); 3667 } 3668 3669 #undef __FUNCT__ 3670 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3671 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3672 { 3673 Mat mat; 3674 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3675 PetscErrorCode ierr; 3676 3677 PetscFunctionBegin; 3678 *newmat = 0; 3679 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3680 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3681 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3682 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3683 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3684 a = (Mat_MPIAIJ*)mat->data; 3685 3686 mat->factortype = matin->factortype; 3687 mat->rmap->bs = matin->rmap->bs; 3688 mat->cmap->bs = matin->cmap->bs; 3689 mat->assembled = PETSC_TRUE; 3690 mat->insertmode = NOT_SET_VALUES; 3691 mat->preallocated = PETSC_TRUE; 3692 3693 a->size = oldmat->size; 3694 a->rank = oldmat->rank; 3695 a->donotstash = oldmat->donotstash; 3696 a->roworiented = oldmat->roworiented; 3697 a->rowindices = 0; 3698 a->rowvalues = 0; 3699 a->getrowactive = PETSC_FALSE; 3700 3701 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3702 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3703 3704 if (oldmat->colmap) { 3705 #if defined(PETSC_USE_CTABLE) 3706 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3707 #else 3708 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3709 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3710 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3711 #endif 3712 } else a->colmap = 0; 3713 if (oldmat->garray) { 3714 PetscInt len; 3715 len = oldmat->B->cmap->n; 3716 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3717 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3718 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3719 } else a->garray = 0; 3720 3721 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3722 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3723 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3724 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3725 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3726 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3727 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3728 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3729 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3730 *newmat = mat; 3731 PetscFunctionReturn(0); 3732 } 3733 3734 3735 3736 #undef __FUNCT__ 3737 #define __FUNCT__ "MatLoad_MPIAIJ" 3738 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3739 { 3740 PetscScalar *vals,*svals; 3741 MPI_Comm comm; 3742 PetscErrorCode ierr; 3743 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3744 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3745 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3746 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3747 PetscInt cend,cstart,n,*rowners,sizesset=1; 3748 int fd; 3749 PetscInt bs = 1; 3750 3751 PetscFunctionBegin; 3752 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3754 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3755 if (!rank) { 3756 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3757 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3758 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3759 } 3760 3761 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3762 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3763 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3764 3765 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3766 3767 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3768 M = header[1]; N = header[2]; 3769 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3770 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3771 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3772 3773 /* If global sizes are set, check if they are consistent with that given in the file */ 3774 if (sizesset) { 3775 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3776 } 3777 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3778 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3779 3780 /* determine ownership of all (block) rows */ 3781 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3782 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3783 else m = newMat->rmap->n; /* Set by user */ 3784 3785 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3786 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3787 3788 /* First process needs enough room for process with most rows */ 3789 if (!rank) { 3790 mmax = rowners[1]; 3791 for (i=2; i<=size; i++) { 3792 mmax = PetscMax(mmax, rowners[i]); 3793 } 3794 } else mmax = -1; /* unused, but compilers complain */ 3795 3796 rowners[0] = 0; 3797 for (i=2; i<=size; i++) { 3798 rowners[i] += rowners[i-1]; 3799 } 3800 rstart = rowners[rank]; 3801 rend = rowners[rank+1]; 3802 3803 /* distribute row lengths to all processors */ 3804 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3805 if (!rank) { 3806 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3807 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3808 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3809 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3810 for (j=0; j<m; j++) { 3811 procsnz[0] += ourlens[j]; 3812 } 3813 for (i=1; i<size; i++) { 3814 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3815 /* calculate the number of nonzeros on each processor */ 3816 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3817 procsnz[i] += rowlengths[j]; 3818 } 3819 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3820 } 3821 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3822 } else { 3823 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3824 } 3825 3826 if (!rank) { 3827 /* determine max buffer needed and allocate it */ 3828 maxnz = 0; 3829 for (i=0; i<size; i++) { 3830 maxnz = PetscMax(maxnz,procsnz[i]); 3831 } 3832 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3833 3834 /* read in my part of the matrix column indices */ 3835 nz = procsnz[0]; 3836 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3837 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3838 3839 /* read in every one elses and ship off */ 3840 for (i=1; i<size; i++) { 3841 nz = procsnz[i]; 3842 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3843 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3844 } 3845 ierr = PetscFree(cols);CHKERRQ(ierr); 3846 } else { 3847 /* determine buffer space needed for message */ 3848 nz = 0; 3849 for (i=0; i<m; i++) { 3850 nz += ourlens[i]; 3851 } 3852 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3853 3854 /* receive message of column indices*/ 3855 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3856 } 3857 3858 /* determine column ownership if matrix is not square */ 3859 if (N != M) { 3860 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3861 else n = newMat->cmap->n; 3862 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3863 cstart = cend - n; 3864 } else { 3865 cstart = rstart; 3866 cend = rend; 3867 n = cend - cstart; 3868 } 3869 3870 /* loop over local rows, determining number of off diagonal entries */ 3871 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3872 jj = 0; 3873 for (i=0; i<m; i++) { 3874 for (j=0; j<ourlens[i]; j++) { 3875 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3876 jj++; 3877 } 3878 } 3879 3880 for (i=0; i<m; i++) { 3881 ourlens[i] -= offlens[i]; 3882 } 3883 if (!sizesset) { 3884 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3885 } 3886 3887 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3888 3889 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3890 3891 for (i=0; i<m; i++) { 3892 ourlens[i] += offlens[i]; 3893 } 3894 3895 if (!rank) { 3896 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3897 3898 /* read in my part of the matrix numerical values */ 3899 nz = procsnz[0]; 3900 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3901 3902 /* insert into matrix */ 3903 jj = rstart; 3904 smycols = mycols; 3905 svals = vals; 3906 for (i=0; i<m; i++) { 3907 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3908 smycols += ourlens[i]; 3909 svals += ourlens[i]; 3910 jj++; 3911 } 3912 3913 /* read in other processors and ship out */ 3914 for (i=1; i<size; i++) { 3915 nz = procsnz[i]; 3916 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3917 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3918 } 3919 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3920 } else { 3921 /* receive numeric values */ 3922 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3923 3924 /* receive message of values*/ 3925 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3926 3927 /* insert into matrix */ 3928 jj = rstart; 3929 smycols = mycols; 3930 svals = vals; 3931 for (i=0; i<m; i++) { 3932 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3933 smycols += ourlens[i]; 3934 svals += ourlens[i]; 3935 jj++; 3936 } 3937 } 3938 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3939 ierr = PetscFree(vals);CHKERRQ(ierr); 3940 ierr = PetscFree(mycols);CHKERRQ(ierr); 3941 ierr = PetscFree(rowners);CHKERRQ(ierr); 3942 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3943 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3944 PetscFunctionReturn(0); 3945 } 3946 3947 #undef __FUNCT__ 3948 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3949 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3950 { 3951 PetscErrorCode ierr; 3952 IS iscol_local; 3953 PetscInt csize; 3954 3955 PetscFunctionBegin; 3956 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3957 if (call == MAT_REUSE_MATRIX) { 3958 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3959 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3960 } else { 3961 PetscInt cbs; 3962 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3963 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3964 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3965 } 3966 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3967 if (call == MAT_INITIAL_MATRIX) { 3968 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3969 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3970 } 3971 PetscFunctionReturn(0); 3972 } 3973 3974 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3975 #undef __FUNCT__ 3976 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3977 /* 3978 Not great since it makes two copies of the submatrix, first an SeqAIJ 3979 in local and then by concatenating the local matrices the end result. 3980 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3981 3982 Note: This requires a sequential iscol with all indices. 3983 */ 3984 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3985 { 3986 PetscErrorCode ierr; 3987 PetscMPIInt rank,size; 3988 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3989 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3990 PetscBool allcolumns, colflag; 3991 Mat M,Mreuse; 3992 MatScalar *vwork,*aa; 3993 MPI_Comm comm; 3994 Mat_SeqAIJ *aij; 3995 3996 PetscFunctionBegin; 3997 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3998 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4000 4001 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 4002 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 4003 if (colflag && ncol == mat->cmap->N) { 4004 allcolumns = PETSC_TRUE; 4005 } else { 4006 allcolumns = PETSC_FALSE; 4007 } 4008 if (call == MAT_REUSE_MATRIX) { 4009 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 4010 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 4011 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 4012 } else { 4013 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 4014 } 4015 4016 /* 4017 m - number of local rows 4018 n - number of columns (same on all processors) 4019 rstart - first row in new global matrix generated 4020 */ 4021 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 4022 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 4023 if (call == MAT_INITIAL_MATRIX) { 4024 aij = (Mat_SeqAIJ*)(Mreuse)->data; 4025 ii = aij->i; 4026 jj = aij->j; 4027 4028 /* 4029 Determine the number of non-zeros in the diagonal and off-diagonal 4030 portions of the matrix in order to do correct preallocation 4031 */ 4032 4033 /* first get start and end of "diagonal" columns */ 4034 if (csize == PETSC_DECIDE) { 4035 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 4036 if (mglobal == n) { /* square matrix */ 4037 nlocal = m; 4038 } else { 4039 nlocal = n/size + ((n % size) > rank); 4040 } 4041 } else { 4042 nlocal = csize; 4043 } 4044 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4045 rstart = rend - nlocal; 4046 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 4047 4048 /* next, compute all the lengths */ 4049 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 4050 olens = dlens + m; 4051 for (i=0; i<m; i++) { 4052 jend = ii[i+1] - ii[i]; 4053 olen = 0; 4054 dlen = 0; 4055 for (j=0; j<jend; j++) { 4056 if (*jj < rstart || *jj >= rend) olen++; 4057 else dlen++; 4058 jj++; 4059 } 4060 olens[i] = olen; 4061 dlens[i] = dlen; 4062 } 4063 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 4064 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 4065 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 4066 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 4067 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 4068 ierr = PetscFree(dlens);CHKERRQ(ierr); 4069 } else { 4070 PetscInt ml,nl; 4071 4072 M = *newmat; 4073 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 4074 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 4075 ierr = MatZeroEntries(M);CHKERRQ(ierr); 4076 /* 4077 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 4078 rather than the slower MatSetValues(). 4079 */ 4080 M->was_assembled = PETSC_TRUE; 4081 M->assembled = PETSC_FALSE; 4082 } 4083 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 4084 aij = (Mat_SeqAIJ*)(Mreuse)->data; 4085 ii = aij->i; 4086 jj = aij->j; 4087 aa = aij->a; 4088 for (i=0; i<m; i++) { 4089 row = rstart + i; 4090 nz = ii[i+1] - ii[i]; 4091 cwork = jj; jj += nz; 4092 vwork = aa; aa += nz; 4093 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 4094 } 4095 4096 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4097 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4098 *newmat = M; 4099 4100 /* save submatrix used in processor for next request */ 4101 if (call == MAT_INITIAL_MATRIX) { 4102 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 4103 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 4104 } 4105 PetscFunctionReturn(0); 4106 } 4107 4108 #undef __FUNCT__ 4109 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 4110 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4111 { 4112 PetscInt m,cstart, cend,j,nnz,i,d; 4113 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 4114 const PetscInt *JJ; 4115 PetscScalar *values; 4116 PetscErrorCode ierr; 4117 4118 PetscFunctionBegin; 4119 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 4120 4121 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 4122 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 4123 m = B->rmap->n; 4124 cstart = B->cmap->rstart; 4125 cend = B->cmap->rend; 4126 rstart = B->rmap->rstart; 4127 4128 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 4129 4130 #if defined(PETSC_USE_DEBUGGING) 4131 for (i=0; i<m; i++) { 4132 nnz = Ii[i+1]- Ii[i]; 4133 JJ = J + Ii[i]; 4134 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4135 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 4136 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4137 } 4138 #endif 4139 4140 for (i=0; i<m; i++) { 4141 nnz = Ii[i+1]- Ii[i]; 4142 JJ = J + Ii[i]; 4143 nnz_max = PetscMax(nnz_max,nnz); 4144 d = 0; 4145 for (j=0; j<nnz; j++) { 4146 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4147 } 4148 d_nnz[i] = d; 4149 o_nnz[i] = nnz - d; 4150 } 4151 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4152 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4153 4154 if (v) values = (PetscScalar*)v; 4155 else { 4156 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 4157 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 4158 } 4159 4160 for (i=0; i<m; i++) { 4161 ii = i + rstart; 4162 nnz = Ii[i+1]- Ii[i]; 4163 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 4164 } 4165 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4166 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4167 4168 if (!v) { 4169 ierr = PetscFree(values);CHKERRQ(ierr); 4170 } 4171 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 #undef __FUNCT__ 4176 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 4177 /*@ 4178 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4179 (the default parallel PETSc format). 4180 4181 Collective on MPI_Comm 4182 4183 Input Parameters: 4184 + B - the matrix 4185 . i - the indices into j for the start of each local row (starts with zero) 4186 . j - the column indices for each local row (starts with zero) 4187 - v - optional values in the matrix 4188 4189 Level: developer 4190 4191 Notes: 4192 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4193 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4194 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4195 4196 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4197 4198 The format which is used for the sparse matrix input, is equivalent to a 4199 row-major ordering.. i.e for the following matrix, the input data expected is 4200 as shown: 4201 4202 1 0 0 4203 2 0 3 P0 4204 ------- 4205 4 5 6 P1 4206 4207 Process0 [P0]: rows_owned=[0,1] 4208 i = {0,1,3} [size = nrow+1 = 2+1] 4209 j = {0,0,2} [size = nz = 6] 4210 v = {1,2,3} [size = nz = 6] 4211 4212 Process1 [P1]: rows_owned=[2] 4213 i = {0,3} [size = nrow+1 = 1+1] 4214 j = {0,1,2} [size = nz = 6] 4215 v = {4,5,6} [size = nz = 6] 4216 4217 .keywords: matrix, aij, compressed row, sparse, parallel 4218 4219 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 4220 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4221 @*/ 4222 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4223 { 4224 PetscErrorCode ierr; 4225 4226 PetscFunctionBegin; 4227 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4228 PetscFunctionReturn(0); 4229 } 4230 4231 #undef __FUNCT__ 4232 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4233 /*@C 4234 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4235 (the default parallel PETSc format). For good matrix assembly performance 4236 the user should preallocate the matrix storage by setting the parameters 4237 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4238 performance can be increased by more than a factor of 50. 4239 4240 Collective on MPI_Comm 4241 4242 Input Parameters: 4243 + A - the matrix 4244 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4245 (same value is used for all local rows) 4246 . d_nnz - array containing the number of nonzeros in the various rows of the 4247 DIAGONAL portion of the local submatrix (possibly different for each row) 4248 or NULL, if d_nz is used to specify the nonzero structure. 4249 The size of this array is equal to the number of local rows, i.e 'm'. 4250 For matrices that will be factored, you must leave room for (and set) 4251 the diagonal entry even if it is zero. 4252 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4253 submatrix (same value is used for all local rows). 4254 - o_nnz - array containing the number of nonzeros in the various rows of the 4255 OFF-DIAGONAL portion of the local submatrix (possibly different for 4256 each row) or NULL, if o_nz is used to specify the nonzero 4257 structure. The size of this array is equal to the number 4258 of local rows, i.e 'm'. 4259 4260 If the *_nnz parameter is given then the *_nz parameter is ignored 4261 4262 The AIJ format (also called the Yale sparse matrix format or 4263 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4264 storage. The stored row and column indices begin with zero. 4265 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4266 4267 The parallel matrix is partitioned such that the first m0 rows belong to 4268 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4269 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4270 4271 The DIAGONAL portion of the local submatrix of a processor can be defined 4272 as the submatrix which is obtained by extraction the part corresponding to 4273 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4274 first row that belongs to the processor, r2 is the last row belonging to 4275 the this processor, and c1-c2 is range of indices of the local part of a 4276 vector suitable for applying the matrix to. This is an mxn matrix. In the 4277 common case of a square matrix, the row and column ranges are the same and 4278 the DIAGONAL part is also square. The remaining portion of the local 4279 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4280 4281 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4282 4283 You can call MatGetInfo() to get information on how effective the preallocation was; 4284 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4285 You can also run with the option -info and look for messages with the string 4286 malloc in them to see if additional memory allocation was needed. 4287 4288 Example usage: 4289 4290 Consider the following 8x8 matrix with 34 non-zero values, that is 4291 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4292 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4293 as follows: 4294 4295 .vb 4296 1 2 0 | 0 3 0 | 0 4 4297 Proc0 0 5 6 | 7 0 0 | 8 0 4298 9 0 10 | 11 0 0 | 12 0 4299 ------------------------------------- 4300 13 0 14 | 15 16 17 | 0 0 4301 Proc1 0 18 0 | 19 20 21 | 0 0 4302 0 0 0 | 22 23 0 | 24 0 4303 ------------------------------------- 4304 Proc2 25 26 27 | 0 0 28 | 29 0 4305 30 0 0 | 31 32 33 | 0 34 4306 .ve 4307 4308 This can be represented as a collection of submatrices as: 4309 4310 .vb 4311 A B C 4312 D E F 4313 G H I 4314 .ve 4315 4316 Where the submatrices A,B,C are owned by proc0, D,E,F are 4317 owned by proc1, G,H,I are owned by proc2. 4318 4319 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4320 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4321 The 'M','N' parameters are 8,8, and have the same values on all procs. 4322 4323 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4324 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4325 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4326 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4327 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4328 matrix, ans [DF] as another SeqAIJ matrix. 4329 4330 When d_nz, o_nz parameters are specified, d_nz storage elements are 4331 allocated for every row of the local diagonal submatrix, and o_nz 4332 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4333 One way to choose d_nz and o_nz is to use the max nonzerors per local 4334 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4335 In this case, the values of d_nz,o_nz are: 4336 .vb 4337 proc0 : dnz = 2, o_nz = 2 4338 proc1 : dnz = 3, o_nz = 2 4339 proc2 : dnz = 1, o_nz = 4 4340 .ve 4341 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4342 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4343 for proc3. i.e we are using 12+15+10=37 storage locations to store 4344 34 values. 4345 4346 When d_nnz, o_nnz parameters are specified, the storage is specified 4347 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4348 In the above case the values for d_nnz,o_nnz are: 4349 .vb 4350 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4351 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4352 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4353 .ve 4354 Here the space allocated is sum of all the above values i.e 34, and 4355 hence pre-allocation is perfect. 4356 4357 Level: intermediate 4358 4359 .keywords: matrix, aij, compressed row, sparse, parallel 4360 4361 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4362 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4363 @*/ 4364 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4365 { 4366 PetscErrorCode ierr; 4367 4368 PetscFunctionBegin; 4369 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4370 PetscValidType(B,1); 4371 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4372 PetscFunctionReturn(0); 4373 } 4374 4375 #undef __FUNCT__ 4376 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4377 /*@ 4378 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4379 CSR format the local rows. 4380 4381 Collective on MPI_Comm 4382 4383 Input Parameters: 4384 + comm - MPI communicator 4385 . m - number of local rows (Cannot be PETSC_DECIDE) 4386 . n - This value should be the same as the local size used in creating the 4387 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4388 calculated if N is given) For square matrices n is almost always m. 4389 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4390 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4391 . i - row indices 4392 . j - column indices 4393 - a - matrix values 4394 4395 Output Parameter: 4396 . mat - the matrix 4397 4398 Level: intermediate 4399 4400 Notes: 4401 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4402 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4403 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4404 4405 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4406 4407 The format which is used for the sparse matrix input, is equivalent to a 4408 row-major ordering.. i.e for the following matrix, the input data expected is 4409 as shown: 4410 4411 1 0 0 4412 2 0 3 P0 4413 ------- 4414 4 5 6 P1 4415 4416 Process0 [P0]: rows_owned=[0,1] 4417 i = {0,1,3} [size = nrow+1 = 2+1] 4418 j = {0,0,2} [size = nz = 6] 4419 v = {1,2,3} [size = nz = 6] 4420 4421 Process1 [P1]: rows_owned=[2] 4422 i = {0,3} [size = nrow+1 = 1+1] 4423 j = {0,1,2} [size = nz = 6] 4424 v = {4,5,6} [size = nz = 6] 4425 4426 .keywords: matrix, aij, compressed row, sparse, parallel 4427 4428 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4429 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4430 @*/ 4431 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4432 { 4433 PetscErrorCode ierr; 4434 4435 PetscFunctionBegin; 4436 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4437 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4438 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4439 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4440 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4441 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4442 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4443 PetscFunctionReturn(0); 4444 } 4445 4446 #undef __FUNCT__ 4447 #define __FUNCT__ "MatCreateAIJ" 4448 /*@C 4449 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4450 (the default parallel PETSc format). For good matrix assembly performance 4451 the user should preallocate the matrix storage by setting the parameters 4452 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4453 performance can be increased by more than a factor of 50. 4454 4455 Collective on MPI_Comm 4456 4457 Input Parameters: 4458 + comm - MPI communicator 4459 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4460 This value should be the same as the local size used in creating the 4461 y vector for the matrix-vector product y = Ax. 4462 . n - This value should be the same as the local size used in creating the 4463 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4464 calculated if N is given) For square matrices n is almost always m. 4465 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4466 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4467 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4468 (same value is used for all local rows) 4469 . d_nnz - array containing the number of nonzeros in the various rows of the 4470 DIAGONAL portion of the local submatrix (possibly different for each row) 4471 or NULL, if d_nz is used to specify the nonzero structure. 4472 The size of this array is equal to the number of local rows, i.e 'm'. 4473 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4474 submatrix (same value is used for all local rows). 4475 - o_nnz - array containing the number of nonzeros in the various rows of the 4476 OFF-DIAGONAL portion of the local submatrix (possibly different for 4477 each row) or NULL, if o_nz is used to specify the nonzero 4478 structure. The size of this array is equal to the number 4479 of local rows, i.e 'm'. 4480 4481 Output Parameter: 4482 . A - the matrix 4483 4484 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4485 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4486 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4487 4488 Notes: 4489 If the *_nnz parameter is given then the *_nz parameter is ignored 4490 4491 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4492 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4493 storage requirements for this matrix. 4494 4495 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4496 processor than it must be used on all processors that share the object for 4497 that argument. 4498 4499 The user MUST specify either the local or global matrix dimensions 4500 (possibly both). 4501 4502 The parallel matrix is partitioned across processors such that the 4503 first m0 rows belong to process 0, the next m1 rows belong to 4504 process 1, the next m2 rows belong to process 2 etc.. where 4505 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4506 values corresponding to [m x N] submatrix. 4507 4508 The columns are logically partitioned with the n0 columns belonging 4509 to 0th partition, the next n1 columns belonging to the next 4510 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4511 4512 The DIAGONAL portion of the local submatrix on any given processor 4513 is the submatrix corresponding to the rows and columns m,n 4514 corresponding to the given processor. i.e diagonal matrix on 4515 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4516 etc. The remaining portion of the local submatrix [m x (N-n)] 4517 constitute the OFF-DIAGONAL portion. The example below better 4518 illustrates this concept. 4519 4520 For a square global matrix we define each processor's diagonal portion 4521 to be its local rows and the corresponding columns (a square submatrix); 4522 each processor's off-diagonal portion encompasses the remainder of the 4523 local matrix (a rectangular submatrix). 4524 4525 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4526 4527 When calling this routine with a single process communicator, a matrix of 4528 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4529 type of communicator, use the construction mechanism: 4530 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4531 4532 By default, this format uses inodes (identical nodes) when possible. 4533 We search for consecutive rows with the same nonzero structure, thereby 4534 reusing matrix information to achieve increased efficiency. 4535 4536 Options Database Keys: 4537 + -mat_no_inode - Do not use inodes 4538 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4539 - -mat_aij_oneindex - Internally use indexing starting at 1 4540 rather than 0. Note that when calling MatSetValues(), 4541 the user still MUST index entries starting at 0! 4542 4543 4544 Example usage: 4545 4546 Consider the following 8x8 matrix with 34 non-zero values, that is 4547 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4548 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4549 as follows: 4550 4551 .vb 4552 1 2 0 | 0 3 0 | 0 4 4553 Proc0 0 5 6 | 7 0 0 | 8 0 4554 9 0 10 | 11 0 0 | 12 0 4555 ------------------------------------- 4556 13 0 14 | 15 16 17 | 0 0 4557 Proc1 0 18 0 | 19 20 21 | 0 0 4558 0 0 0 | 22 23 0 | 24 0 4559 ------------------------------------- 4560 Proc2 25 26 27 | 0 0 28 | 29 0 4561 30 0 0 | 31 32 33 | 0 34 4562 .ve 4563 4564 This can be represented as a collection of submatrices as: 4565 4566 .vb 4567 A B C 4568 D E F 4569 G H I 4570 .ve 4571 4572 Where the submatrices A,B,C are owned by proc0, D,E,F are 4573 owned by proc1, G,H,I are owned by proc2. 4574 4575 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4576 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4577 The 'M','N' parameters are 8,8, and have the same values on all procs. 4578 4579 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4580 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4581 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4582 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4583 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4584 matrix, ans [DF] as another SeqAIJ matrix. 4585 4586 When d_nz, o_nz parameters are specified, d_nz storage elements are 4587 allocated for every row of the local diagonal submatrix, and o_nz 4588 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4589 One way to choose d_nz and o_nz is to use the max nonzerors per local 4590 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4591 In this case, the values of d_nz,o_nz are: 4592 .vb 4593 proc0 : dnz = 2, o_nz = 2 4594 proc1 : dnz = 3, o_nz = 2 4595 proc2 : dnz = 1, o_nz = 4 4596 .ve 4597 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4598 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4599 for proc3. i.e we are using 12+15+10=37 storage locations to store 4600 34 values. 4601 4602 When d_nnz, o_nnz parameters are specified, the storage is specified 4603 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4604 In the above case the values for d_nnz,o_nnz are: 4605 .vb 4606 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4607 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4608 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4609 .ve 4610 Here the space allocated is sum of all the above values i.e 34, and 4611 hence pre-allocation is perfect. 4612 4613 Level: intermediate 4614 4615 .keywords: matrix, aij, compressed row, sparse, parallel 4616 4617 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4618 MPIAIJ, MatCreateMPIAIJWithArrays() 4619 @*/ 4620 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4621 { 4622 PetscErrorCode ierr; 4623 PetscMPIInt size; 4624 4625 PetscFunctionBegin; 4626 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4627 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4628 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4629 if (size > 1) { 4630 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4631 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4632 } else { 4633 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4634 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4635 } 4636 PetscFunctionReturn(0); 4637 } 4638 4639 #undef __FUNCT__ 4640 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4641 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4642 { 4643 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4644 4645 PetscFunctionBegin; 4646 *Ad = a->A; 4647 *Ao = a->B; 4648 *colmap = a->garray; 4649 PetscFunctionReturn(0); 4650 } 4651 4652 #undef __FUNCT__ 4653 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4654 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4655 { 4656 PetscErrorCode ierr; 4657 PetscInt i; 4658 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4659 4660 PetscFunctionBegin; 4661 if (coloring->ctype == IS_COLORING_GLOBAL) { 4662 ISColoringValue *allcolors,*colors; 4663 ISColoring ocoloring; 4664 4665 /* set coloring for diagonal portion */ 4666 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4667 4668 /* set coloring for off-diagonal portion */ 4669 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4670 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4671 for (i=0; i<a->B->cmap->n; i++) { 4672 colors[i] = allcolors[a->garray[i]]; 4673 } 4674 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4675 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4676 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4677 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4678 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4679 ISColoringValue *colors; 4680 PetscInt *larray; 4681 ISColoring ocoloring; 4682 4683 /* set coloring for diagonal portion */ 4684 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4685 for (i=0; i<a->A->cmap->n; i++) { 4686 larray[i] = i + A->cmap->rstart; 4687 } 4688 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4689 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4690 for (i=0; i<a->A->cmap->n; i++) { 4691 colors[i] = coloring->colors[larray[i]]; 4692 } 4693 ierr = PetscFree(larray);CHKERRQ(ierr); 4694 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4695 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4696 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4697 4698 /* set coloring for off-diagonal portion */ 4699 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4700 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4701 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4702 for (i=0; i<a->B->cmap->n; i++) { 4703 colors[i] = coloring->colors[larray[i]]; 4704 } 4705 ierr = PetscFree(larray);CHKERRQ(ierr); 4706 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4707 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4708 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4709 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4710 PetscFunctionReturn(0); 4711 } 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4715 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4716 { 4717 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4718 PetscErrorCode ierr; 4719 4720 PetscFunctionBegin; 4721 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4722 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4723 PetscFunctionReturn(0); 4724 } 4725 4726 #undef __FUNCT__ 4727 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4728 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4729 { 4730 PetscErrorCode ierr; 4731 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4732 PetscInt *indx; 4733 4734 PetscFunctionBegin; 4735 /* This routine will ONLY return MPIAIJ type matrix */ 4736 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4737 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4738 if (n == PETSC_DECIDE) { 4739 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4740 } 4741 /* Check sum(n) = N */ 4742 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4743 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4744 4745 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4746 rstart -= m; 4747 4748 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4749 for (i=0; i<m; i++) { 4750 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4751 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4752 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4753 } 4754 4755 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4756 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4757 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4758 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4759 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4760 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4761 PetscFunctionReturn(0); 4762 } 4763 4764 #undef __FUNCT__ 4765 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4766 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4767 { 4768 PetscErrorCode ierr; 4769 PetscInt m,N,i,rstart,nnz,Ii; 4770 PetscInt *indx; 4771 PetscScalar *values; 4772 4773 PetscFunctionBegin; 4774 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4775 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4776 for (i=0; i<m; i++) { 4777 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4778 Ii = i + rstart; 4779 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4780 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4781 } 4782 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4783 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4784 PetscFunctionReturn(0); 4785 } 4786 4787 #undef __FUNCT__ 4788 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4789 /*@ 4790 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4791 matrices from each processor 4792 4793 Collective on MPI_Comm 4794 4795 Input Parameters: 4796 + comm - the communicators the parallel matrix will live on 4797 . inmat - the input sequential matrices 4798 . n - number of local columns (or PETSC_DECIDE) 4799 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4800 4801 Output Parameter: 4802 . outmat - the parallel matrix generated 4803 4804 Level: advanced 4805 4806 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4807 4808 @*/ 4809 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4810 { 4811 PetscErrorCode ierr; 4812 4813 PetscFunctionBegin; 4814 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4815 if (scall == MAT_INITIAL_MATRIX) { 4816 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4817 } 4818 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4819 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4820 PetscFunctionReturn(0); 4821 } 4822 4823 #undef __FUNCT__ 4824 #define __FUNCT__ "MatFileSplit" 4825 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4826 { 4827 PetscErrorCode ierr; 4828 PetscMPIInt rank; 4829 PetscInt m,N,i,rstart,nnz; 4830 size_t len; 4831 const PetscInt *indx; 4832 PetscViewer out; 4833 char *name; 4834 Mat B; 4835 const PetscScalar *values; 4836 4837 PetscFunctionBegin; 4838 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4839 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4840 /* Should this be the type of the diagonal block of A? */ 4841 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4842 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4843 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4844 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4845 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4846 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4847 for (i=0; i<m; i++) { 4848 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4849 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4850 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4851 } 4852 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4853 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4854 4855 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4856 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4857 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4858 sprintf(name,"%s.%d",outfile,rank); 4859 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4860 ierr = PetscFree(name);CHKERRQ(ierr); 4861 ierr = MatView(B,out);CHKERRQ(ierr); 4862 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4863 ierr = MatDestroy(&B);CHKERRQ(ierr); 4864 PetscFunctionReturn(0); 4865 } 4866 4867 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4868 #undef __FUNCT__ 4869 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4870 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4871 { 4872 PetscErrorCode ierr; 4873 Mat_Merge_SeqsToMPI *merge; 4874 PetscContainer container; 4875 4876 PetscFunctionBegin; 4877 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4878 if (container) { 4879 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4880 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4881 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4882 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4883 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4884 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4885 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4886 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4887 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4888 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4889 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4890 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4891 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4892 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4893 ierr = PetscFree(merge);CHKERRQ(ierr); 4894 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4895 } 4896 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4897 PetscFunctionReturn(0); 4898 } 4899 4900 #include <../src/mat/utils/freespace.h> 4901 #include <petscbt.h> 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4905 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4906 { 4907 PetscErrorCode ierr; 4908 MPI_Comm comm; 4909 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4910 PetscMPIInt size,rank,taga,*len_s; 4911 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4912 PetscInt proc,m; 4913 PetscInt **buf_ri,**buf_rj; 4914 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4915 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4916 MPI_Request *s_waits,*r_waits; 4917 MPI_Status *status; 4918 MatScalar *aa=a->a; 4919 MatScalar **abuf_r,*ba_i; 4920 Mat_Merge_SeqsToMPI *merge; 4921 PetscContainer container; 4922 4923 PetscFunctionBegin; 4924 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4925 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4926 4927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4928 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4929 4930 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4931 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4932 4933 bi = merge->bi; 4934 bj = merge->bj; 4935 buf_ri = merge->buf_ri; 4936 buf_rj = merge->buf_rj; 4937 4938 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4939 owners = merge->rowmap->range; 4940 len_s = merge->len_s; 4941 4942 /* send and recv matrix values */ 4943 /*-----------------------------*/ 4944 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4945 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4946 4947 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4948 for (proc=0,k=0; proc<size; proc++) { 4949 if (!len_s[proc]) continue; 4950 i = owners[proc]; 4951 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4952 k++; 4953 } 4954 4955 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4956 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4957 ierr = PetscFree(status);CHKERRQ(ierr); 4958 4959 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4960 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4961 4962 /* insert mat values of mpimat */ 4963 /*----------------------------*/ 4964 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4965 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4966 4967 for (k=0; k<merge->nrecv; k++) { 4968 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4969 nrows = *(buf_ri_k[k]); 4970 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4971 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4972 } 4973 4974 /* set values of ba */ 4975 m = merge->rowmap->n; 4976 for (i=0; i<m; i++) { 4977 arow = owners[rank] + i; 4978 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4979 bnzi = bi[i+1] - bi[i]; 4980 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4981 4982 /* add local non-zero vals of this proc's seqmat into ba */ 4983 anzi = ai[arow+1] - ai[arow]; 4984 aj = a->j + ai[arow]; 4985 aa = a->a + ai[arow]; 4986 nextaj = 0; 4987 for (j=0; nextaj<anzi; j++) { 4988 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4989 ba_i[j] += aa[nextaj++]; 4990 } 4991 } 4992 4993 /* add received vals into ba */ 4994 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4995 /* i-th row */ 4996 if (i == *nextrow[k]) { 4997 anzi = *(nextai[k]+1) - *nextai[k]; 4998 aj = buf_rj[k] + *(nextai[k]); 4999 aa = abuf_r[k] + *(nextai[k]); 5000 nextaj = 0; 5001 for (j=0; nextaj<anzi; j++) { 5002 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 5003 ba_i[j] += aa[nextaj++]; 5004 } 5005 } 5006 nextrow[k]++; nextai[k]++; 5007 } 5008 } 5009 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 5010 } 5011 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5012 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5013 5014 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 5015 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 5016 ierr = PetscFree(ba_i);CHKERRQ(ierr); 5017 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5018 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 5023 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 5026 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 5027 { 5028 PetscErrorCode ierr; 5029 Mat B_mpi; 5030 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 5031 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 5032 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 5033 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 5034 PetscInt len,proc,*dnz,*onz,bs,cbs; 5035 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 5036 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 5037 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 5038 MPI_Status *status; 5039 PetscFreeSpaceList free_space=NULL,current_space=NULL; 5040 PetscBT lnkbt; 5041 Mat_Merge_SeqsToMPI *merge; 5042 PetscContainer container; 5043 5044 PetscFunctionBegin; 5045 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5046 5047 /* make sure it is a PETSc comm */ 5048 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 5049 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5051 5052 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 5053 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 5054 5055 /* determine row ownership */ 5056 /*---------------------------------------------------------*/ 5057 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 5058 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 5059 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 5060 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 5061 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 5062 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 5063 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 5064 5065 m = merge->rowmap->n; 5066 owners = merge->rowmap->range; 5067 5068 /* determine the number of messages to send, their lengths */ 5069 /*---------------------------------------------------------*/ 5070 len_s = merge->len_s; 5071 5072 len = 0; /* length of buf_si[] */ 5073 merge->nsend = 0; 5074 for (proc=0; proc<size; proc++) { 5075 len_si[proc] = 0; 5076 if (proc == rank) { 5077 len_s[proc] = 0; 5078 } else { 5079 len_si[proc] = owners[proc+1] - owners[proc] + 1; 5080 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 5081 } 5082 if (len_s[proc]) { 5083 merge->nsend++; 5084 nrows = 0; 5085 for (i=owners[proc]; i<owners[proc+1]; i++) { 5086 if (ai[i+1] > ai[i]) nrows++; 5087 } 5088 len_si[proc] = 2*(nrows+1); 5089 len += len_si[proc]; 5090 } 5091 } 5092 5093 /* determine the number and length of messages to receive for ij-structure */ 5094 /*-------------------------------------------------------------------------*/ 5095 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 5096 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 5097 5098 /* post the Irecv of j-structure */ 5099 /*-------------------------------*/ 5100 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 5101 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 5102 5103 /* post the Isend of j-structure */ 5104 /*--------------------------------*/ 5105 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 5106 5107 for (proc=0, k=0; proc<size; proc++) { 5108 if (!len_s[proc]) continue; 5109 i = owners[proc]; 5110 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 5111 k++; 5112 } 5113 5114 /* receives and sends of j-structure are complete */ 5115 /*------------------------------------------------*/ 5116 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 5117 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 5118 5119 /* send and recv i-structure */ 5120 /*---------------------------*/ 5121 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 5122 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 5123 5124 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 5125 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 5126 for (proc=0,k=0; proc<size; proc++) { 5127 if (!len_s[proc]) continue; 5128 /* form outgoing message for i-structure: 5129 buf_si[0]: nrows to be sent 5130 [1:nrows]: row index (global) 5131 [nrows+1:2*nrows+1]: i-structure index 5132 */ 5133 /*-------------------------------------------*/ 5134 nrows = len_si[proc]/2 - 1; 5135 buf_si_i = buf_si + nrows+1; 5136 buf_si[0] = nrows; 5137 buf_si_i[0] = 0; 5138 nrows = 0; 5139 for (i=owners[proc]; i<owners[proc+1]; i++) { 5140 anzi = ai[i+1] - ai[i]; 5141 if (anzi) { 5142 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 5143 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 5144 nrows++; 5145 } 5146 } 5147 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 5148 k++; 5149 buf_si += len_si[proc]; 5150 } 5151 5152 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 5153 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 5154 5155 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 5156 for (i=0; i<merge->nrecv; i++) { 5157 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 5158 } 5159 5160 ierr = PetscFree(len_si);CHKERRQ(ierr); 5161 ierr = PetscFree(len_ri);CHKERRQ(ierr); 5162 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 5163 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 5164 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 5165 ierr = PetscFree(buf_s);CHKERRQ(ierr); 5166 ierr = PetscFree(status);CHKERRQ(ierr); 5167 5168 /* compute a local seq matrix in each processor */ 5169 /*----------------------------------------------*/ 5170 /* allocate bi array and free space for accumulating nonzero column info */ 5171 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 5172 bi[0] = 0; 5173 5174 /* create and initialize a linked list */ 5175 nlnk = N+1; 5176 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5177 5178 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5179 len = ai[owners[rank+1]] - ai[owners[rank]]; 5180 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 5181 5182 current_space = free_space; 5183 5184 /* determine symbolic info for each local row */ 5185 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 5186 5187 for (k=0; k<merge->nrecv; k++) { 5188 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5189 nrows = *buf_ri_k[k]; 5190 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5191 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5192 } 5193 5194 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5195 len = 0; 5196 for (i=0; i<m; i++) { 5197 bnzi = 0; 5198 /* add local non-zero cols of this proc's seqmat into lnk */ 5199 arow = owners[rank] + i; 5200 anzi = ai[arow+1] - ai[arow]; 5201 aj = a->j + ai[arow]; 5202 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5203 bnzi += nlnk; 5204 /* add received col data into lnk */ 5205 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5206 if (i == *nextrow[k]) { /* i-th row */ 5207 anzi = *(nextai[k]+1) - *nextai[k]; 5208 aj = buf_rj[k] + *nextai[k]; 5209 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5210 bnzi += nlnk; 5211 nextrow[k]++; nextai[k]++; 5212 } 5213 } 5214 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5215 5216 /* if free space is not available, make more free space */ 5217 if (current_space->local_remaining<bnzi) { 5218 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5219 nspacedouble++; 5220 } 5221 /* copy data into free space, then initialize lnk */ 5222 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5223 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5224 5225 current_space->array += bnzi; 5226 current_space->local_used += bnzi; 5227 current_space->local_remaining -= bnzi; 5228 5229 bi[i+1] = bi[i] + bnzi; 5230 } 5231 5232 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5233 5234 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5235 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5236 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5237 5238 /* create symbolic parallel matrix B_mpi */ 5239 /*---------------------------------------*/ 5240 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5241 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5242 if (n==PETSC_DECIDE) { 5243 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5244 } else { 5245 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5246 } 5247 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5248 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5249 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5250 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5251 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5252 5253 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5254 B_mpi->assembled = PETSC_FALSE; 5255 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5256 merge->bi = bi; 5257 merge->bj = bj; 5258 merge->buf_ri = buf_ri; 5259 merge->buf_rj = buf_rj; 5260 merge->coi = NULL; 5261 merge->coj = NULL; 5262 merge->owners_co = NULL; 5263 5264 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5265 5266 /* attach the supporting struct to B_mpi for reuse */ 5267 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5268 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5269 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5270 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5271 *mpimat = B_mpi; 5272 5273 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5274 PetscFunctionReturn(0); 5275 } 5276 5277 #undef __FUNCT__ 5278 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5279 /*@C 5280 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5281 matrices from each processor 5282 5283 Collective on MPI_Comm 5284 5285 Input Parameters: 5286 + comm - the communicators the parallel matrix will live on 5287 . seqmat - the input sequential matrices 5288 . m - number of local rows (or PETSC_DECIDE) 5289 . n - number of local columns (or PETSC_DECIDE) 5290 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5291 5292 Output Parameter: 5293 . mpimat - the parallel matrix generated 5294 5295 Level: advanced 5296 5297 Notes: 5298 The dimensions of the sequential matrix in each processor MUST be the same. 5299 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5300 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5301 @*/ 5302 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5303 { 5304 PetscErrorCode ierr; 5305 PetscMPIInt size; 5306 5307 PetscFunctionBegin; 5308 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5309 if (size == 1) { 5310 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5311 if (scall == MAT_INITIAL_MATRIX) { 5312 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5313 } else { 5314 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5315 } 5316 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5317 PetscFunctionReturn(0); 5318 } 5319 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5320 if (scall == MAT_INITIAL_MATRIX) { 5321 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5322 } 5323 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5324 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5325 PetscFunctionReturn(0); 5326 } 5327 5328 #undef __FUNCT__ 5329 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5330 /*@ 5331 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5332 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5333 with MatGetSize() 5334 5335 Not Collective 5336 5337 Input Parameters: 5338 + A - the matrix 5339 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5340 5341 Output Parameter: 5342 . A_loc - the local sequential matrix generated 5343 5344 Level: developer 5345 5346 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5347 5348 @*/ 5349 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5350 { 5351 PetscErrorCode ierr; 5352 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5353 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5354 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5355 MatScalar *aa=a->a,*ba=b->a,*cam; 5356 PetscScalar *ca; 5357 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5358 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5359 PetscBool match; 5360 5361 PetscFunctionBegin; 5362 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5363 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5364 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5365 if (scall == MAT_INITIAL_MATRIX) { 5366 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5367 ci[0] = 0; 5368 for (i=0; i<am; i++) { 5369 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5370 } 5371 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5372 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5373 k = 0; 5374 for (i=0; i<am; i++) { 5375 ncols_o = bi[i+1] - bi[i]; 5376 ncols_d = ai[i+1] - ai[i]; 5377 /* off-diagonal portion of A */ 5378 for (jo=0; jo<ncols_o; jo++) { 5379 col = cmap[*bj]; 5380 if (col >= cstart) break; 5381 cj[k] = col; bj++; 5382 ca[k++] = *ba++; 5383 } 5384 /* diagonal portion of A */ 5385 for (j=0; j<ncols_d; j++) { 5386 cj[k] = cstart + *aj++; 5387 ca[k++] = *aa++; 5388 } 5389 /* off-diagonal portion of A */ 5390 for (j=jo; j<ncols_o; j++) { 5391 cj[k] = cmap[*bj++]; 5392 ca[k++] = *ba++; 5393 } 5394 } 5395 /* put together the new matrix */ 5396 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5397 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5398 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5399 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5400 mat->free_a = PETSC_TRUE; 5401 mat->free_ij = PETSC_TRUE; 5402 mat->nonew = 0; 5403 } else if (scall == MAT_REUSE_MATRIX) { 5404 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5405 ci = mat->i; cj = mat->j; cam = mat->a; 5406 for (i=0; i<am; i++) { 5407 /* off-diagonal portion of A */ 5408 ncols_o = bi[i+1] - bi[i]; 5409 for (jo=0; jo<ncols_o; jo++) { 5410 col = cmap[*bj]; 5411 if (col >= cstart) break; 5412 *cam++ = *ba++; bj++; 5413 } 5414 /* diagonal portion of A */ 5415 ncols_d = ai[i+1] - ai[i]; 5416 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5417 /* off-diagonal portion of A */ 5418 for (j=jo; j<ncols_o; j++) { 5419 *cam++ = *ba++; bj++; 5420 } 5421 } 5422 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5423 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5424 PetscFunctionReturn(0); 5425 } 5426 5427 #undef __FUNCT__ 5428 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5429 /*@C 5430 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5431 5432 Not Collective 5433 5434 Input Parameters: 5435 + A - the matrix 5436 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5437 - row, col - index sets of rows and columns to extract (or NULL) 5438 5439 Output Parameter: 5440 . A_loc - the local sequential matrix generated 5441 5442 Level: developer 5443 5444 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5445 5446 @*/ 5447 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5448 { 5449 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5450 PetscErrorCode ierr; 5451 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5452 IS isrowa,iscola; 5453 Mat *aloc; 5454 PetscBool match; 5455 5456 PetscFunctionBegin; 5457 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5458 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5459 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5460 if (!row) { 5461 start = A->rmap->rstart; end = A->rmap->rend; 5462 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5463 } else { 5464 isrowa = *row; 5465 } 5466 if (!col) { 5467 start = A->cmap->rstart; 5468 cmap = a->garray; 5469 nzA = a->A->cmap->n; 5470 nzB = a->B->cmap->n; 5471 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5472 ncols = 0; 5473 for (i=0; i<nzB; i++) { 5474 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5475 else break; 5476 } 5477 imark = i; 5478 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5479 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5480 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5481 } else { 5482 iscola = *col; 5483 } 5484 if (scall != MAT_INITIAL_MATRIX) { 5485 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5486 aloc[0] = *A_loc; 5487 } 5488 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5489 *A_loc = aloc[0]; 5490 ierr = PetscFree(aloc);CHKERRQ(ierr); 5491 if (!row) { 5492 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5493 } 5494 if (!col) { 5495 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5496 } 5497 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5498 PetscFunctionReturn(0); 5499 } 5500 5501 #undef __FUNCT__ 5502 #define __FUNCT__ "MatGetBrowsOfAcols" 5503 /*@C 5504 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5505 5506 Collective on Mat 5507 5508 Input Parameters: 5509 + A,B - the matrices in mpiaij format 5510 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5511 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5512 5513 Output Parameter: 5514 + rowb, colb - index sets of rows and columns of B to extract 5515 - B_seq - the sequential matrix generated 5516 5517 Level: developer 5518 5519 @*/ 5520 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5521 { 5522 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5523 PetscErrorCode ierr; 5524 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5525 IS isrowb,iscolb; 5526 Mat *bseq=NULL; 5527 5528 PetscFunctionBegin; 5529 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5530 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5531 } 5532 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5533 5534 if (scall == MAT_INITIAL_MATRIX) { 5535 start = A->cmap->rstart; 5536 cmap = a->garray; 5537 nzA = a->A->cmap->n; 5538 nzB = a->B->cmap->n; 5539 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5540 ncols = 0; 5541 for (i=0; i<nzB; i++) { /* row < local row index */ 5542 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5543 else break; 5544 } 5545 imark = i; 5546 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5547 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5548 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5549 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5550 } else { 5551 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5552 isrowb = *rowb; iscolb = *colb; 5553 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5554 bseq[0] = *B_seq; 5555 } 5556 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5557 *B_seq = bseq[0]; 5558 ierr = PetscFree(bseq);CHKERRQ(ierr); 5559 if (!rowb) { 5560 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5561 } else { 5562 *rowb = isrowb; 5563 } 5564 if (!colb) { 5565 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5566 } else { 5567 *colb = iscolb; 5568 } 5569 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5570 PetscFunctionReturn(0); 5571 } 5572 5573 #undef __FUNCT__ 5574 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5575 /* 5576 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5577 of the OFF-DIAGONAL portion of local A 5578 5579 Collective on Mat 5580 5581 Input Parameters: 5582 + A,B - the matrices in mpiaij format 5583 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5584 5585 Output Parameter: 5586 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5587 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5588 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5589 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5590 5591 Level: developer 5592 5593 */ 5594 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5595 { 5596 VecScatter_MPI_General *gen_to,*gen_from; 5597 PetscErrorCode ierr; 5598 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5599 Mat_SeqAIJ *b_oth; 5600 VecScatter ctx =a->Mvctx; 5601 MPI_Comm comm; 5602 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5603 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5604 PetscScalar *rvalues,*svalues; 5605 MatScalar *b_otha,*bufa,*bufA; 5606 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5607 MPI_Request *rwaits = NULL,*swaits = NULL; 5608 MPI_Status *sstatus,rstatus; 5609 PetscMPIInt jj; 5610 PetscInt *cols,sbs,rbs; 5611 PetscScalar *vals; 5612 5613 PetscFunctionBegin; 5614 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5615 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5616 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5617 } 5618 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5619 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5620 5621 gen_to = (VecScatter_MPI_General*)ctx->todata; 5622 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5623 rvalues = gen_from->values; /* holds the length of receiving row */ 5624 svalues = gen_to->values; /* holds the length of sending row */ 5625 nrecvs = gen_from->n; 5626 nsends = gen_to->n; 5627 5628 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5629 srow = gen_to->indices; /* local row index to be sent */ 5630 sstarts = gen_to->starts; 5631 sprocs = gen_to->procs; 5632 sstatus = gen_to->sstatus; 5633 sbs = gen_to->bs; 5634 rstarts = gen_from->starts; 5635 rprocs = gen_from->procs; 5636 rbs = gen_from->bs; 5637 5638 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5639 if (scall == MAT_INITIAL_MATRIX) { 5640 /* i-array */ 5641 /*---------*/ 5642 /* post receives */ 5643 for (i=0; i<nrecvs; i++) { 5644 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5645 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5646 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5647 } 5648 5649 /* pack the outgoing message */ 5650 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5651 5652 sstartsj[0] = 0; 5653 rstartsj[0] = 0; 5654 len = 0; /* total length of j or a array to be sent */ 5655 k = 0; 5656 for (i=0; i<nsends; i++) { 5657 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5658 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5659 for (j=0; j<nrows; j++) { 5660 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5661 for (l=0; l<sbs; l++) { 5662 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5663 5664 rowlen[j*sbs+l] = ncols; 5665 5666 len += ncols; 5667 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5668 } 5669 k++; 5670 } 5671 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5672 5673 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5674 } 5675 /* recvs and sends of i-array are completed */ 5676 i = nrecvs; 5677 while (i--) { 5678 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5679 } 5680 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5681 5682 /* allocate buffers for sending j and a arrays */ 5683 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5684 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5685 5686 /* create i-array of B_oth */ 5687 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5688 5689 b_othi[0] = 0; 5690 len = 0; /* total length of j or a array to be received */ 5691 k = 0; 5692 for (i=0; i<nrecvs; i++) { 5693 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5694 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5695 for (j=0; j<nrows; j++) { 5696 b_othi[k+1] = b_othi[k] + rowlen[j]; 5697 len += rowlen[j]; k++; 5698 } 5699 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5700 } 5701 5702 /* allocate space for j and a arrrays of B_oth */ 5703 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5704 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5705 5706 /* j-array */ 5707 /*---------*/ 5708 /* post receives of j-array */ 5709 for (i=0; i<nrecvs; i++) { 5710 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5711 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5712 } 5713 5714 /* pack the outgoing message j-array */ 5715 k = 0; 5716 for (i=0; i<nsends; i++) { 5717 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5718 bufJ = bufj+sstartsj[i]; 5719 for (j=0; j<nrows; j++) { 5720 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5721 for (ll=0; ll<sbs; ll++) { 5722 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5723 for (l=0; l<ncols; l++) { 5724 *bufJ++ = cols[l]; 5725 } 5726 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5727 } 5728 } 5729 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5730 } 5731 5732 /* recvs and sends of j-array are completed */ 5733 i = nrecvs; 5734 while (i--) { 5735 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5736 } 5737 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5738 } else if (scall == MAT_REUSE_MATRIX) { 5739 sstartsj = *startsj_s; 5740 rstartsj = *startsj_r; 5741 bufa = *bufa_ptr; 5742 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5743 b_otha = b_oth->a; 5744 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5745 5746 /* a-array */ 5747 /*---------*/ 5748 /* post receives of a-array */ 5749 for (i=0; i<nrecvs; i++) { 5750 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5751 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5752 } 5753 5754 /* pack the outgoing message a-array */ 5755 k = 0; 5756 for (i=0; i<nsends; i++) { 5757 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5758 bufA = bufa+sstartsj[i]; 5759 for (j=0; j<nrows; j++) { 5760 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5761 for (ll=0; ll<sbs; ll++) { 5762 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5763 for (l=0; l<ncols; l++) { 5764 *bufA++ = vals[l]; 5765 } 5766 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5767 } 5768 } 5769 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5770 } 5771 /* recvs and sends of a-array are completed */ 5772 i = nrecvs; 5773 while (i--) { 5774 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5775 } 5776 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5777 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5778 5779 if (scall == MAT_INITIAL_MATRIX) { 5780 /* put together the new matrix */ 5781 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5782 5783 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5784 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5785 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5786 b_oth->free_a = PETSC_TRUE; 5787 b_oth->free_ij = PETSC_TRUE; 5788 b_oth->nonew = 0; 5789 5790 ierr = PetscFree(bufj);CHKERRQ(ierr); 5791 if (!startsj_s || !bufa_ptr) { 5792 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5793 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5794 } else { 5795 *startsj_s = sstartsj; 5796 *startsj_r = rstartsj; 5797 *bufa_ptr = bufa; 5798 } 5799 } 5800 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5801 PetscFunctionReturn(0); 5802 } 5803 5804 #undef __FUNCT__ 5805 #define __FUNCT__ "MatGetCommunicationStructs" 5806 /*@C 5807 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5808 5809 Not Collective 5810 5811 Input Parameters: 5812 . A - The matrix in mpiaij format 5813 5814 Output Parameter: 5815 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5816 . colmap - A map from global column index to local index into lvec 5817 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5818 5819 Level: developer 5820 5821 @*/ 5822 #if defined(PETSC_USE_CTABLE) 5823 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5824 #else 5825 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5826 #endif 5827 { 5828 Mat_MPIAIJ *a; 5829 5830 PetscFunctionBegin; 5831 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5832 PetscValidPointer(lvec, 2); 5833 PetscValidPointer(colmap, 3); 5834 PetscValidPointer(multScatter, 4); 5835 a = (Mat_MPIAIJ*) A->data; 5836 if (lvec) *lvec = a->lvec; 5837 if (colmap) *colmap = a->colmap; 5838 if (multScatter) *multScatter = a->Mvctx; 5839 PetscFunctionReturn(0); 5840 } 5841 5842 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5843 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5844 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5845 5846 #undef __FUNCT__ 5847 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5848 /* 5849 Computes (B'*A')' since computing B*A directly is untenable 5850 5851 n p p 5852 ( ) ( ) ( ) 5853 m ( A ) * n ( B ) = m ( C ) 5854 ( ) ( ) ( ) 5855 5856 */ 5857 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5858 { 5859 PetscErrorCode ierr; 5860 Mat At,Bt,Ct; 5861 5862 PetscFunctionBegin; 5863 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5864 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5865 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5866 ierr = MatDestroy(&At);CHKERRQ(ierr); 5867 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5868 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5869 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5870 PetscFunctionReturn(0); 5871 } 5872 5873 #undef __FUNCT__ 5874 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5875 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5876 { 5877 PetscErrorCode ierr; 5878 PetscInt m=A->rmap->n,n=B->cmap->n; 5879 Mat Cmat; 5880 5881 PetscFunctionBegin; 5882 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5883 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5884 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5885 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5886 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5887 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5888 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5889 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5890 5891 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5892 5893 *C = Cmat; 5894 PetscFunctionReturn(0); 5895 } 5896 5897 /* ----------------------------------------------------------------*/ 5898 #undef __FUNCT__ 5899 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5900 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5901 { 5902 PetscErrorCode ierr; 5903 5904 PetscFunctionBegin; 5905 if (scall == MAT_INITIAL_MATRIX) { 5906 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5907 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5908 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5909 } 5910 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5911 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5912 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5913 PetscFunctionReturn(0); 5914 } 5915 5916 #if defined(PETSC_HAVE_MUMPS) 5917 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5918 #endif 5919 #if defined(PETSC_HAVE_PASTIX) 5920 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5921 #endif 5922 #if defined(PETSC_HAVE_SUPERLU_DIST) 5923 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5924 #endif 5925 #if defined(PETSC_HAVE_CLIQUE) 5926 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5927 #endif 5928 5929 /*MC 5930 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5931 5932 Options Database Keys: 5933 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5934 5935 Level: beginner 5936 5937 .seealso: MatCreateAIJ() 5938 M*/ 5939 5940 #undef __FUNCT__ 5941 #define __FUNCT__ "MatCreate_MPIAIJ" 5942 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5943 { 5944 Mat_MPIAIJ *b; 5945 PetscErrorCode ierr; 5946 PetscMPIInt size; 5947 5948 PetscFunctionBegin; 5949 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5950 5951 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5952 B->data = (void*)b; 5953 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5954 B->assembled = PETSC_FALSE; 5955 B->insertmode = NOT_SET_VALUES; 5956 b->size = size; 5957 5958 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5959 5960 /* build cache for off array entries formed */ 5961 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5962 5963 b->donotstash = PETSC_FALSE; 5964 b->colmap = 0; 5965 b->garray = 0; 5966 b->roworiented = PETSC_TRUE; 5967 5968 /* stuff used for matrix vector multiply */ 5969 b->lvec = NULL; 5970 b->Mvctx = NULL; 5971 5972 /* stuff for MatGetRow() */ 5973 b->rowindices = 0; 5974 b->rowvalues = 0; 5975 b->getrowactive = PETSC_FALSE; 5976 5977 /* flexible pointer used in CUSP/CUSPARSE classes */ 5978 b->spptr = NULL; 5979 5980 #if defined(PETSC_HAVE_MUMPS) 5981 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5982 #endif 5983 #if defined(PETSC_HAVE_PASTIX) 5984 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5985 #endif 5986 #if defined(PETSC_HAVE_SUPERLU_DIST) 5987 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5988 #endif 5989 #if defined(PETSC_HAVE_CLIQUE) 5990 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5991 #endif 5992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6000 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6005 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6006 PetscFunctionReturn(0); 6007 } 6008 6009 #undef __FUNCT__ 6010 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 6011 /*@ 6012 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6013 and "off-diagonal" part of the matrix in CSR format. 6014 6015 Collective on MPI_Comm 6016 6017 Input Parameters: 6018 + comm - MPI communicator 6019 . m - number of local rows (Cannot be PETSC_DECIDE) 6020 . n - This value should be the same as the local size used in creating the 6021 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6022 calculated if N is given) For square matrices n is almost always m. 6023 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6024 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6025 . i - row indices for "diagonal" portion of matrix 6026 . j - column indices 6027 . a - matrix values 6028 . oi - row indices for "off-diagonal" portion of matrix 6029 . oj - column indices 6030 - oa - matrix values 6031 6032 Output Parameter: 6033 . mat - the matrix 6034 6035 Level: advanced 6036 6037 Notes: 6038 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6039 must free the arrays once the matrix has been destroyed and not before. 6040 6041 The i and j indices are 0 based 6042 6043 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6044 6045 This sets local rows and cannot be used to set off-processor values. 6046 6047 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6048 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6049 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6050 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6051 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6052 communication if it is known that only local entries will be set. 6053 6054 .keywords: matrix, aij, compressed row, sparse, parallel 6055 6056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6057 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6058 @*/ 6059 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6060 { 6061 PetscErrorCode ierr; 6062 Mat_MPIAIJ *maij; 6063 6064 PetscFunctionBegin; 6065 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6066 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6067 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6068 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6069 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6070 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6071 maij = (Mat_MPIAIJ*) (*mat)->data; 6072 6073 (*mat)->preallocated = PETSC_TRUE; 6074 6075 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6076 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6077 6078 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6079 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6080 6081 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6082 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6083 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6084 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6085 6086 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6087 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6088 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6089 PetscFunctionReturn(0); 6090 } 6091 6092 /* 6093 Special version for direct calls from Fortran 6094 */ 6095 #include <petsc-private/fortranimpl.h> 6096 6097 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6098 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6099 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6100 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6101 #endif 6102 6103 /* Change these macros so can be used in void function */ 6104 #undef CHKERRQ 6105 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6106 #undef SETERRQ2 6107 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6108 #undef SETERRQ3 6109 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6110 #undef SETERRQ 6111 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6112 6113 #undef __FUNCT__ 6114 #define __FUNCT__ "matsetvaluesmpiaij_" 6115 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6116 { 6117 Mat mat = *mmat; 6118 PetscInt m = *mm, n = *mn; 6119 InsertMode addv = *maddv; 6120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6121 PetscScalar value; 6122 PetscErrorCode ierr; 6123 6124 MatCheckPreallocated(mat,1); 6125 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6126 6127 #if defined(PETSC_USE_DEBUG) 6128 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6129 #endif 6130 { 6131 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6132 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6133 PetscBool roworiented = aij->roworiented; 6134 6135 /* Some Variables required in the macro */ 6136 Mat A = aij->A; 6137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6138 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6139 MatScalar *aa = a->a; 6140 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6141 Mat B = aij->B; 6142 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6143 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6144 MatScalar *ba = b->a; 6145 6146 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6147 PetscInt nonew = a->nonew; 6148 MatScalar *ap1,*ap2; 6149 6150 PetscFunctionBegin; 6151 for (i=0; i<m; i++) { 6152 if (im[i] < 0) continue; 6153 #if defined(PETSC_USE_DEBUG) 6154 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6155 #endif 6156 if (im[i] >= rstart && im[i] < rend) { 6157 row = im[i] - rstart; 6158 lastcol1 = -1; 6159 rp1 = aj + ai[row]; 6160 ap1 = aa + ai[row]; 6161 rmax1 = aimax[row]; 6162 nrow1 = ailen[row]; 6163 low1 = 0; 6164 high1 = nrow1; 6165 lastcol2 = -1; 6166 rp2 = bj + bi[row]; 6167 ap2 = ba + bi[row]; 6168 rmax2 = bimax[row]; 6169 nrow2 = bilen[row]; 6170 low2 = 0; 6171 high2 = nrow2; 6172 6173 for (j=0; j<n; j++) { 6174 if (roworiented) value = v[i*n+j]; 6175 else value = v[i+j*m]; 6176 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 6177 if (in[j] >= cstart && in[j] < cend) { 6178 col = in[j] - cstart; 6179 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 6180 } else if (in[j] < 0) continue; 6181 #if defined(PETSC_USE_DEBUG) 6182 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6183 #endif 6184 else { 6185 if (mat->was_assembled) { 6186 if (!aij->colmap) { 6187 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6188 } 6189 #if defined(PETSC_USE_CTABLE) 6190 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6191 col--; 6192 #else 6193 col = aij->colmap[in[j]] - 1; 6194 #endif 6195 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6196 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6197 col = in[j]; 6198 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6199 B = aij->B; 6200 b = (Mat_SeqAIJ*)B->data; 6201 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6202 rp2 = bj + bi[row]; 6203 ap2 = ba + bi[row]; 6204 rmax2 = bimax[row]; 6205 nrow2 = bilen[row]; 6206 low2 = 0; 6207 high2 = nrow2; 6208 bm = aij->B->rmap->n; 6209 ba = b->a; 6210 } 6211 } else col = in[j]; 6212 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 6213 } 6214 } 6215 } else if (!aij->donotstash) { 6216 if (roworiented) { 6217 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6218 } else { 6219 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6220 } 6221 } 6222 } 6223 } 6224 PetscFunctionReturnVoid(); 6225 } 6226 6227