1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2511 PetscInt nzlocal,nsends,nrecvs; 2512 PetscMPIInt *send_rank,*recv_rank; 2513 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2514 PetscScalar *sbuf_a,**rbuf_a; 2515 PetscErrorCode (*Destroy)(Mat); 2516 } Mat_Redundant; 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2521 { 2522 PetscErrorCode ierr; 2523 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2524 PetscInt i; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2530 for (i=0; i<redund->nrecvs; i++) { 2531 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2532 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2533 } 2534 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2535 ierr = PetscFree(redund);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatDestroy_MatRedundant" 2541 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2542 { 2543 PetscErrorCode ierr; 2544 PetscContainer container; 2545 Mat_Redundant *redund=NULL; 2546 2547 PetscFunctionBegin; 2548 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2549 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2550 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2551 2552 A->ops->destroy = redund->Destroy; 2553 2554 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2555 if (A->ops->destroy) { 2556 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2564 { 2565 PetscMPIInt rank,size; 2566 MPI_Comm comm,subcomm=psubcomm->comm; 2567 PetscErrorCode ierr; 2568 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,nsubcomm=psubcomm->n; 2569 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2570 PetscInt *rowrange = mat->rmap->range; 2571 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2572 Mat A = aij->A,B=aij->B,C=*matredundant; 2573 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2574 PetscScalar *sbuf_a; 2575 PetscInt nzlocal=a->nz+b->nz; 2576 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2577 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N,m,n; 2578 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2579 MatScalar *aworkA,*aworkB; 2580 PetscScalar *vals; 2581 PetscMPIInt tag1,tag2,tag3,imdex; 2582 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2583 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2584 MPI_Status recv_status,*send_status; 2585 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2586 PetscInt **rbuf_j=NULL; 2587 PetscScalar **rbuf_a=NULL; 2588 Mat_Redundant *redund =NULL; 2589 PetscContainer container; 2590 PetscInt rstart_sub,rend_sub,mloc_sub; 2591 2592 PetscFunctionBegin; 2593 ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr); 2594 if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N); 2595 2596 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2597 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2599 2600 if (reuse == MAT_REUSE_MATRIX) { 2601 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2602 ierr = MatGetLocalSize(C,&m,&n);CHKERRQ(ierr); 2603 if (m != mat->rmap->n || n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2604 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2605 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2606 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2607 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2608 2609 nsends = redund->nsends; 2610 nrecvs = redund->nrecvs; 2611 send_rank = redund->send_rank; 2612 recv_rank = redund->recv_rank; 2613 sbuf_nz = redund->sbuf_nz; 2614 rbuf_nz = redund->rbuf_nz; 2615 sbuf_j = redund->sbuf_j; 2616 sbuf_a = redund->sbuf_a; 2617 rbuf_j = redund->rbuf_j; 2618 rbuf_a = redund->rbuf_a; 2619 } 2620 2621 if (reuse == MAT_INITIAL_MATRIX) { 2622 PetscMPIInt subrank,subsize; 2623 PetscInt nleftover,np_subcomm; 2624 2625 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2626 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2627 2628 /* get local size of redundant matrix */ 2629 const PetscInt *range; 2630 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2631 rstart_sub = range[nsubcomm*subrank]; 2632 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2633 rend_sub = range[nsubcomm*(subrank+1)]; 2634 } else { 2635 rend_sub = mat->rmap->N; 2636 } 2637 2638 mloc_sub = rend_sub - rstart_sub; 2639 /* printf("[%d] rstart_sub %d %d, mloc_sub %d\n",rank,rstart_sub,rend_sub, mloc_sub); */ 2640 2641 /* get the destination processors' id send_rank, nsends and nrecvs */ 2642 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2643 2644 np_subcomm = size/nsubcomm; 2645 nleftover = size - nsubcomm*np_subcomm; 2646 2647 nsends = 0; nrecvs = 0; 2648 for (i=0; i<size; i++) { 2649 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2650 send_rank[nsends] = i; nsends++; 2651 recv_rank[nrecvs++] = i; 2652 } 2653 } 2654 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2655 i = size-nleftover-1; 2656 j = 0; 2657 while (j < nsubcomm - nleftover) { 2658 send_rank[nsends++] = i; 2659 i--; j++; 2660 } 2661 } 2662 2663 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2664 for (i=0; i<nleftover; i++) { 2665 recv_rank[nrecvs++] = size-nleftover+i; 2666 } 2667 } 2668 2669 /* allocate sbuf_j, sbuf_a */ 2670 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2671 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2672 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2673 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2674 2675 /* copy mat's local entries into the buffers */ 2676 if (reuse == MAT_INITIAL_MATRIX) { 2677 rownz_max = 0; 2678 rptr = sbuf_j; 2679 cols = sbuf_j + rend-rstart + 1; 2680 vals = sbuf_a; 2681 rptr[0] = 0; 2682 for (i=0; i<rend-rstart; i++) { 2683 row = i + rstart; 2684 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2685 ncols = nzA + nzB; 2686 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2687 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2688 /* load the column indices for this row into cols */ 2689 lwrite = 0; 2690 for (l=0; l<nzB; l++) { 2691 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2692 vals[lwrite] = aworkB[l]; 2693 cols[lwrite++] = ctmp; 2694 } 2695 } 2696 for (l=0; l<nzA; l++) { 2697 vals[lwrite] = aworkA[l]; 2698 cols[lwrite++] = cstart + cworkA[l]; 2699 } 2700 for (l=0; l<nzB; l++) { 2701 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2702 vals[lwrite] = aworkB[l]; 2703 cols[lwrite++] = ctmp; 2704 } 2705 } 2706 vals += ncols; 2707 cols += ncols; 2708 rptr[i+1] = rptr[i] + ncols; 2709 if (rownz_max < ncols) rownz_max = ncols; 2710 } 2711 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2712 } else { /* only copy matrix values into sbuf_a */ 2713 rptr = sbuf_j; 2714 vals = sbuf_a; 2715 rptr[0] = 0; 2716 for (i=0; i<rend-rstart; i++) { 2717 row = i + rstart; 2718 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2719 ncols = nzA + nzB; 2720 cworkB = b->j + b->i[i]; 2721 aworkA = a->a + a->i[i]; 2722 aworkB = b->a + b->i[i]; 2723 lwrite = 0; 2724 for (l=0; l<nzB; l++) { 2725 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2726 } 2727 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2728 for (l=0; l<nzB; l++) { 2729 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2730 } 2731 vals += ncols; 2732 rptr[i+1] = rptr[i] + ncols; 2733 } 2734 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2735 2736 /* send nzlocal to others, and recv other's nzlocal */ 2737 /*--------------------------------------------------*/ 2738 if (reuse == MAT_INITIAL_MATRIX) { 2739 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2740 2741 s_waits2 = s_waits3 + nsends; 2742 s_waits1 = s_waits2 + nsends; 2743 r_waits1 = s_waits1 + nsends; 2744 r_waits2 = r_waits1 + nrecvs; 2745 r_waits3 = r_waits2 + nrecvs; 2746 } else { 2747 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2748 2749 r_waits3 = s_waits3 + nsends; 2750 } 2751 2752 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2753 if (reuse == MAT_INITIAL_MATRIX) { 2754 /* get new tags to keep the communication clean */ 2755 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2756 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2757 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2758 2759 /* post receives of other's nzlocal */ 2760 for (i=0; i<nrecvs; i++) { 2761 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2762 } 2763 /* send nzlocal to others */ 2764 for (i=0; i<nsends; i++) { 2765 sbuf_nz[i] = nzlocal; 2766 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2767 } 2768 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2769 count = nrecvs; 2770 while (count) { 2771 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2772 2773 recv_rank[imdex] = recv_status.MPI_SOURCE; 2774 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2775 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2776 2777 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2778 2779 rbuf_nz[imdex] += i + 2; 2780 2781 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2782 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2783 count--; 2784 } 2785 /* wait on sends of nzlocal */ 2786 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2787 /* send mat->i,j to others, and recv from other's */ 2788 /*------------------------------------------------*/ 2789 for (i=0; i<nsends; i++) { 2790 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2791 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2792 } 2793 /* wait on receives of mat->i,j */ 2794 /*------------------------------*/ 2795 count = nrecvs; 2796 while (count) { 2797 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2798 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2799 count--; 2800 } 2801 /* wait on sends of mat->i,j */ 2802 /*---------------------------*/ 2803 if (nsends) { 2804 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2805 } 2806 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2807 2808 /* post receives, send and receive mat->a */ 2809 /*----------------------------------------*/ 2810 for (imdex=0; imdex<nrecvs; imdex++) { 2811 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2812 } 2813 for (i=0; i<nsends; i++) { 2814 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2815 } 2816 count = nrecvs; 2817 while (count) { 2818 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2819 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2820 count--; 2821 } 2822 if (nsends) { 2823 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2824 } 2825 2826 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2827 2828 /* create redundant matrix */ 2829 /*-------------------------*/ 2830 if (reuse == MAT_INITIAL_MATRIX) { 2831 /* compute rownz_max for preallocation */ 2832 for (imdex=0; imdex<nrecvs; imdex++) { 2833 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2834 rptr = rbuf_j[imdex]; 2835 for (i=0; i<j; i++) { 2836 ncols = rptr[i+1] - rptr[i]; 2837 if (rownz_max < ncols) rownz_max = ncols; 2838 } 2839 } 2840 2841 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2842 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2843 /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */ 2844 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2845 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2846 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2847 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2848 } else { 2849 C = *matredundant; 2850 } 2851 2852 /* insert local matrix entries */ 2853 rptr = sbuf_j; 2854 cols = sbuf_j + rend-rstart + 1; 2855 vals = sbuf_a; 2856 for (i=0; i<rend-rstart; i++) { 2857 row = i + rstart; 2858 ncols = rptr[i+1] - rptr[i]; 2859 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2860 vals += ncols; 2861 cols += ncols; 2862 } 2863 /* insert received matrix entries */ 2864 for (imdex=0; imdex<nrecvs; imdex++) { 2865 rstart = rowrange[recv_rank[imdex]]; 2866 rend = rowrange[recv_rank[imdex]+1]; 2867 rptr = rbuf_j[imdex]; 2868 cols = rbuf_j[imdex] + rend-rstart + 1; 2869 vals = rbuf_a[imdex]; 2870 for (i=0; i<rend-rstart; i++) { 2871 row = i + rstart; 2872 ncols = rptr[i+1] - rptr[i]; 2873 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 vals += ncols; 2875 cols += ncols; 2876 } 2877 } 2878 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 2881 if (reuse == MAT_INITIAL_MATRIX) { 2882 PetscContainer container; 2883 *matredundant = C; 2884 /* create a supporting struct and attach it to C for reuse */ 2885 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2886 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2887 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2888 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2889 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2890 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2891 2892 redund->nzlocal = nzlocal; 2893 redund->nsends = nsends; 2894 redund->nrecvs = nrecvs; 2895 redund->send_rank = send_rank; 2896 redund->recv_rank = recv_rank; 2897 redund->sbuf_nz = sbuf_nz; 2898 redund->rbuf_nz = rbuf_nz; 2899 redund->sbuf_j = sbuf_j; 2900 redund->sbuf_a = sbuf_a; 2901 redund->rbuf_j = rbuf_j; 2902 redund->rbuf_a = rbuf_a; 2903 2904 redund->Destroy = C->ops->destroy; 2905 C->ops->destroy = MatDestroy_MatRedundant; 2906 } 2907 PetscFunctionReturn(0); 2908 } 2909 2910 #undef __FUNCT__ 2911 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2912 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2913 { 2914 PetscMPIInt rank,size; 2915 MPI_Comm comm; 2916 PetscErrorCode ierr; 2917 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2918 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2919 PetscInt *rowrange = mat->rmap->range; 2920 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2921 Mat A = aij->A,B=aij->B,C=*matredundant; 2922 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2923 PetscScalar *sbuf_a; 2924 PetscInt nzlocal=a->nz+b->nz; 2925 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2926 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2927 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2928 MatScalar *aworkA,*aworkB; 2929 PetscScalar *vals; 2930 PetscMPIInt tag1,tag2,tag3,imdex; 2931 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2932 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2933 MPI_Status recv_status,*send_status; 2934 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2935 PetscInt **rbuf_j=NULL; 2936 PetscScalar **rbuf_a=NULL; 2937 Mat_Redundant *redund =NULL; 2938 PetscContainer container; 2939 2940 PetscFunctionBegin; 2941 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2942 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2944 2945 if (reuse == MAT_REUSE_MATRIX) { 2946 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2947 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2948 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2949 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2950 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2951 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2952 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2953 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2954 2955 nsends = redund->nsends; 2956 nrecvs = redund->nrecvs; 2957 send_rank = redund->send_rank; 2958 recv_rank = redund->recv_rank; 2959 sbuf_nz = redund->sbuf_nz; 2960 rbuf_nz = redund->rbuf_nz; 2961 sbuf_j = redund->sbuf_j; 2962 sbuf_a = redund->sbuf_a; 2963 rbuf_j = redund->rbuf_j; 2964 rbuf_a = redund->rbuf_a; 2965 } 2966 2967 if (reuse == MAT_INITIAL_MATRIX) { 2968 PetscMPIInt subrank,subsize; 2969 PetscInt nleftover,np_subcomm; 2970 /* get the destination processors' id send_rank, nsends and nrecvs */ 2971 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2972 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2973 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2974 2975 np_subcomm = size/nsubcomm; 2976 nleftover = size - nsubcomm*np_subcomm; 2977 2978 nsends = 0; nrecvs = 0; 2979 for (i=0; i<size; i++) { /* i=rank*/ 2980 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2981 send_rank[nsends] = i; nsends++; 2982 recv_rank[nrecvs++] = i; 2983 } 2984 } 2985 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2986 i = size-nleftover-1; 2987 j = 0; 2988 while (j < nsubcomm - nleftover) { 2989 send_rank[nsends++] = i; 2990 i--; j++; 2991 } 2992 } 2993 2994 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2995 for (i=0; i<nleftover; i++) { 2996 recv_rank[nrecvs++] = size-nleftover+i; 2997 } 2998 } 2999 3000 /* allocate sbuf_j, sbuf_a */ 3001 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 3002 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 3003 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 3004 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 3005 3006 /* copy mat's local entries into the buffers */ 3007 if (reuse == MAT_INITIAL_MATRIX) { 3008 rownz_max = 0; 3009 rptr = sbuf_j; 3010 cols = sbuf_j + rend-rstart + 1; 3011 vals = sbuf_a; 3012 rptr[0] = 0; 3013 for (i=0; i<rend-rstart; i++) { 3014 row = i + rstart; 3015 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 3016 ncols = nzA + nzB; 3017 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 3018 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 3019 /* load the column indices for this row into cols */ 3020 lwrite = 0; 3021 for (l=0; l<nzB; l++) { 3022 if ((ctmp = bmap[cworkB[l]]) < cstart) { 3023 vals[lwrite] = aworkB[l]; 3024 cols[lwrite++] = ctmp; 3025 } 3026 } 3027 for (l=0; l<nzA; l++) { 3028 vals[lwrite] = aworkA[l]; 3029 cols[lwrite++] = cstart + cworkA[l]; 3030 } 3031 for (l=0; l<nzB; l++) { 3032 if ((ctmp = bmap[cworkB[l]]) >= cend) { 3033 vals[lwrite] = aworkB[l]; 3034 cols[lwrite++] = ctmp; 3035 } 3036 } 3037 vals += ncols; 3038 cols += ncols; 3039 rptr[i+1] = rptr[i] + ncols; 3040 if (rownz_max < ncols) rownz_max = ncols; 3041 } 3042 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 3043 } else { /* only copy matrix values into sbuf_a */ 3044 rptr = sbuf_j; 3045 vals = sbuf_a; 3046 rptr[0] = 0; 3047 for (i=0; i<rend-rstart; i++) { 3048 row = i + rstart; 3049 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 3050 ncols = nzA + nzB; 3051 cworkB = b->j + b->i[i]; 3052 aworkA = a->a + a->i[i]; 3053 aworkB = b->a + b->i[i]; 3054 lwrite = 0; 3055 for (l=0; l<nzB; l++) { 3056 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 3057 } 3058 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 3059 for (l=0; l<nzB; l++) { 3060 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 3061 } 3062 vals += ncols; 3063 rptr[i+1] = rptr[i] + ncols; 3064 } 3065 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 3066 3067 /* send nzlocal to others, and recv other's nzlocal */ 3068 /*--------------------------------------------------*/ 3069 if (reuse == MAT_INITIAL_MATRIX) { 3070 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 3071 3072 s_waits2 = s_waits3 + nsends; 3073 s_waits1 = s_waits2 + nsends; 3074 r_waits1 = s_waits1 + nsends; 3075 r_waits2 = r_waits1 + nrecvs; 3076 r_waits3 = r_waits2 + nrecvs; 3077 } else { 3078 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 3079 3080 r_waits3 = s_waits3 + nsends; 3081 } 3082 3083 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 3084 if (reuse == MAT_INITIAL_MATRIX) { 3085 /* get new tags to keep the communication clean */ 3086 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 3087 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 3088 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 3089 3090 /* post receives of other's nzlocal */ 3091 for (i=0; i<nrecvs; i++) { 3092 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 3093 } 3094 /* send nzlocal to others */ 3095 for (i=0; i<nsends; i++) { 3096 sbuf_nz[i] = nzlocal; 3097 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 3098 } 3099 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 3100 count = nrecvs; 3101 while (count) { 3102 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 3103 3104 recv_rank[imdex] = recv_status.MPI_SOURCE; 3105 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 3106 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 3107 3108 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 3109 3110 rbuf_nz[imdex] += i + 2; 3111 3112 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 3113 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 3114 count--; 3115 } 3116 /* wait on sends of nzlocal */ 3117 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 3118 /* send mat->i,j to others, and recv from other's */ 3119 /*------------------------------------------------*/ 3120 for (i=0; i<nsends; i++) { 3121 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 3122 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 3123 } 3124 /* wait on receives of mat->i,j */ 3125 /*------------------------------*/ 3126 count = nrecvs; 3127 while (count) { 3128 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 3129 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 3130 count--; 3131 } 3132 /* wait on sends of mat->i,j */ 3133 /*---------------------------*/ 3134 if (nsends) { 3135 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 3136 } 3137 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 3138 3139 /* post receives, send and receive mat->a */ 3140 /*----------------------------------------*/ 3141 for (imdex=0; imdex<nrecvs; imdex++) { 3142 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 3143 } 3144 for (i=0; i<nsends; i++) { 3145 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 3146 } 3147 count = nrecvs; 3148 while (count) { 3149 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 3150 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 3151 count--; 3152 } 3153 if (nsends) { 3154 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 3155 } 3156 3157 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 3158 3159 /* create redundant matrix */ 3160 /*-------------------------*/ 3161 if (reuse == MAT_INITIAL_MATRIX) { 3162 /* compute rownz_max for preallocation */ 3163 for (imdex=0; imdex<nrecvs; imdex++) { 3164 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 3165 rptr = rbuf_j[imdex]; 3166 for (i=0; i<j; i++) { 3167 ncols = rptr[i+1] - rptr[i]; 3168 if (rownz_max < ncols) rownz_max = ncols; 3169 } 3170 } 3171 3172 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 3173 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3174 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 3175 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 3176 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 3177 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 3178 } else { 3179 C = *matredundant; 3180 } 3181 3182 /* insert local matrix entries */ 3183 rptr = sbuf_j; 3184 cols = sbuf_j + rend-rstart + 1; 3185 vals = sbuf_a; 3186 for (i=0; i<rend-rstart; i++) { 3187 row = i + rstart; 3188 ncols = rptr[i+1] - rptr[i]; 3189 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3190 vals += ncols; 3191 cols += ncols; 3192 } 3193 /* insert received matrix entries */ 3194 for (imdex=0; imdex<nrecvs; imdex++) { 3195 rstart = rowrange[recv_rank[imdex]]; 3196 rend = rowrange[recv_rank[imdex]+1]; 3197 rptr = rbuf_j[imdex]; 3198 cols = rbuf_j[imdex] + rend-rstart + 1; 3199 vals = rbuf_a[imdex]; 3200 for (i=0; i<rend-rstart; i++) { 3201 row = i + rstart; 3202 ncols = rptr[i+1] - rptr[i]; 3203 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3204 vals += ncols; 3205 cols += ncols; 3206 } 3207 } 3208 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3210 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 3211 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 3212 if (reuse == MAT_INITIAL_MATRIX) { 3213 PetscContainer container; 3214 *matredundant = C; 3215 /* create a supporting struct and attach it to C for reuse */ 3216 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 3217 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3218 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 3219 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 3220 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 3221 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 3222 3223 redund->nzlocal = nzlocal; 3224 redund->nsends = nsends; 3225 redund->nrecvs = nrecvs; 3226 redund->send_rank = send_rank; 3227 redund->recv_rank = recv_rank; 3228 redund->sbuf_nz = sbuf_nz; 3229 redund->rbuf_nz = rbuf_nz; 3230 redund->sbuf_j = sbuf_j; 3231 redund->sbuf_a = sbuf_a; 3232 redund->rbuf_j = rbuf_j; 3233 redund->rbuf_a = rbuf_a; 3234 3235 redund->Destroy = C->ops->destroy; 3236 C->ops->destroy = MatDestroy_MatRedundant; 3237 } 3238 PetscFunctionReturn(0); 3239 } 3240 3241 #undef __FUNCT__ 3242 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3243 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3244 { 3245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3246 PetscErrorCode ierr; 3247 PetscInt i,*idxb = 0; 3248 PetscScalar *va,*vb; 3249 Vec vtmp; 3250 3251 PetscFunctionBegin; 3252 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3253 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3254 if (idx) { 3255 for (i=0; i<A->rmap->n; i++) { 3256 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3257 } 3258 } 3259 3260 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3261 if (idx) { 3262 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3263 } 3264 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3265 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3266 3267 for (i=0; i<A->rmap->n; i++) { 3268 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3269 va[i] = vb[i]; 3270 if (idx) idx[i] = a->garray[idxb[i]]; 3271 } 3272 } 3273 3274 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3275 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3276 ierr = PetscFree(idxb);CHKERRQ(ierr); 3277 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3278 PetscFunctionReturn(0); 3279 } 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3283 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3284 { 3285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3286 PetscErrorCode ierr; 3287 PetscInt i,*idxb = 0; 3288 PetscScalar *va,*vb; 3289 Vec vtmp; 3290 3291 PetscFunctionBegin; 3292 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3293 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3294 if (idx) { 3295 for (i=0; i<A->cmap->n; i++) { 3296 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3297 } 3298 } 3299 3300 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3301 if (idx) { 3302 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3303 } 3304 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3305 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3306 3307 for (i=0; i<A->rmap->n; i++) { 3308 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3309 va[i] = vb[i]; 3310 if (idx) idx[i] = a->garray[idxb[i]]; 3311 } 3312 } 3313 3314 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3315 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3316 ierr = PetscFree(idxb);CHKERRQ(ierr); 3317 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3323 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3324 { 3325 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3326 PetscInt n = A->rmap->n; 3327 PetscInt cstart = A->cmap->rstart; 3328 PetscInt *cmap = mat->garray; 3329 PetscInt *diagIdx, *offdiagIdx; 3330 Vec diagV, offdiagV; 3331 PetscScalar *a, *diagA, *offdiagA; 3332 PetscInt r; 3333 PetscErrorCode ierr; 3334 3335 PetscFunctionBegin; 3336 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3337 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3338 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3339 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3340 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3341 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3342 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3343 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3344 for (r = 0; r < n; ++r) { 3345 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3346 a[r] = diagA[r]; 3347 idx[r] = cstart + diagIdx[r]; 3348 } else { 3349 a[r] = offdiagA[r]; 3350 idx[r] = cmap[offdiagIdx[r]]; 3351 } 3352 } 3353 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3354 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3355 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3356 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3357 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3358 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3364 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3365 { 3366 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3367 PetscInt n = A->rmap->n; 3368 PetscInt cstart = A->cmap->rstart; 3369 PetscInt *cmap = mat->garray; 3370 PetscInt *diagIdx, *offdiagIdx; 3371 Vec diagV, offdiagV; 3372 PetscScalar *a, *diagA, *offdiagA; 3373 PetscInt r; 3374 PetscErrorCode ierr; 3375 3376 PetscFunctionBegin; 3377 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3378 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3379 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3380 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3381 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3382 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3383 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3384 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3385 for (r = 0; r < n; ++r) { 3386 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3387 a[r] = diagA[r]; 3388 idx[r] = cstart + diagIdx[r]; 3389 } else { 3390 a[r] = offdiagA[r]; 3391 idx[r] = cmap[offdiagIdx[r]]; 3392 } 3393 } 3394 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3395 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3396 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3397 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3398 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3399 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 3403 #undef __FUNCT__ 3404 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3405 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3406 { 3407 PetscErrorCode ierr; 3408 Mat *dummy; 3409 3410 PetscFunctionBegin; 3411 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3412 *newmat = *dummy; 3413 ierr = PetscFree(dummy);CHKERRQ(ierr); 3414 PetscFunctionReturn(0); 3415 } 3416 3417 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3418 3419 #undef __FUNCT__ 3420 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3421 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3422 { 3423 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3424 PetscErrorCode ierr; 3425 3426 PetscFunctionBegin; 3427 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3428 PetscFunctionReturn(0); 3429 } 3430 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3433 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3434 { 3435 PetscErrorCode ierr; 3436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3437 3438 PetscFunctionBegin; 3439 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3440 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3441 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3442 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3443 PetscFunctionReturn(0); 3444 } 3445 3446 /* -------------------------------------------------------------------*/ 3447 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3448 MatGetRow_MPIAIJ, 3449 MatRestoreRow_MPIAIJ, 3450 MatMult_MPIAIJ, 3451 /* 4*/ MatMultAdd_MPIAIJ, 3452 MatMultTranspose_MPIAIJ, 3453 MatMultTransposeAdd_MPIAIJ, 3454 #if defined(PETSC_HAVE_PBGL) 3455 MatSolve_MPIAIJ, 3456 #else 3457 0, 3458 #endif 3459 0, 3460 0, 3461 /*10*/ 0, 3462 0, 3463 0, 3464 MatSOR_MPIAIJ, 3465 MatTranspose_MPIAIJ, 3466 /*15*/ MatGetInfo_MPIAIJ, 3467 MatEqual_MPIAIJ, 3468 MatGetDiagonal_MPIAIJ, 3469 MatDiagonalScale_MPIAIJ, 3470 MatNorm_MPIAIJ, 3471 /*20*/ MatAssemblyBegin_MPIAIJ, 3472 MatAssemblyEnd_MPIAIJ, 3473 MatSetOption_MPIAIJ, 3474 MatZeroEntries_MPIAIJ, 3475 /*24*/ MatZeroRows_MPIAIJ, 3476 0, 3477 #if defined(PETSC_HAVE_PBGL) 3478 0, 3479 #else 3480 0, 3481 #endif 3482 0, 3483 0, 3484 /*29*/ MatSetUp_MPIAIJ, 3485 #if defined(PETSC_HAVE_PBGL) 3486 0, 3487 #else 3488 0, 3489 #endif 3490 0, 3491 0, 3492 0, 3493 /*34*/ MatDuplicate_MPIAIJ, 3494 0, 3495 0, 3496 0, 3497 0, 3498 /*39*/ MatAXPY_MPIAIJ, 3499 MatGetSubMatrices_MPIAIJ, 3500 MatIncreaseOverlap_MPIAIJ, 3501 MatGetValues_MPIAIJ, 3502 MatCopy_MPIAIJ, 3503 /*44*/ MatGetRowMax_MPIAIJ, 3504 MatScale_MPIAIJ, 3505 0, 3506 0, 3507 MatZeroRowsColumns_MPIAIJ, 3508 /*49*/ MatSetRandom_MPIAIJ, 3509 0, 3510 0, 3511 0, 3512 0, 3513 /*54*/ MatFDColoringCreate_MPIAIJ, 3514 0, 3515 MatSetUnfactored_MPIAIJ, 3516 MatPermute_MPIAIJ, 3517 0, 3518 /*59*/ MatGetSubMatrix_MPIAIJ, 3519 MatDestroy_MPIAIJ, 3520 MatView_MPIAIJ, 3521 0, 3522 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3523 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3524 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3525 0, 3526 0, 3527 0, 3528 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3529 MatGetRowMinAbs_MPIAIJ, 3530 0, 3531 MatSetColoring_MPIAIJ, 3532 0, 3533 MatSetValuesAdifor_MPIAIJ, 3534 /*75*/ MatFDColoringApply_AIJ, 3535 0, 3536 0, 3537 0, 3538 MatFindZeroDiagonals_MPIAIJ, 3539 /*80*/ 0, 3540 0, 3541 0, 3542 /*83*/ MatLoad_MPIAIJ, 3543 0, 3544 0, 3545 0, 3546 0, 3547 0, 3548 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3549 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3550 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3551 MatPtAP_MPIAIJ_MPIAIJ, 3552 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3553 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3554 0, 3555 0, 3556 0, 3557 0, 3558 /*99*/ 0, 3559 0, 3560 0, 3561 MatConjugate_MPIAIJ, 3562 0, 3563 /*104*/MatSetValuesRow_MPIAIJ, 3564 MatRealPart_MPIAIJ, 3565 MatImaginaryPart_MPIAIJ, 3566 0, 3567 0, 3568 /*109*/0, 3569 MatGetRedundantMatrix_MPIAIJ, 3570 MatGetRowMin_MPIAIJ, 3571 0, 3572 0, 3573 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3574 0, 3575 0, 3576 0, 3577 0, 3578 /*119*/0, 3579 0, 3580 0, 3581 0, 3582 MatGetMultiProcBlock_MPIAIJ, 3583 /*124*/MatFindNonzeroRows_MPIAIJ, 3584 MatGetColumnNorms_MPIAIJ, 3585 MatInvertBlockDiagonal_MPIAIJ, 3586 0, 3587 MatGetSubMatricesParallel_MPIAIJ, 3588 /*129*/0, 3589 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3590 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3591 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3592 0, 3593 /*134*/0, 3594 0, 3595 0, 3596 0, 3597 0, 3598 /*139*/0, 3599 0 3600 }; 3601 3602 /* ----------------------------------------------------------------------------------------*/ 3603 3604 #undef __FUNCT__ 3605 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3606 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3607 { 3608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3609 PetscErrorCode ierr; 3610 3611 PetscFunctionBegin; 3612 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3613 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3614 PetscFunctionReturn(0); 3615 } 3616 3617 #undef __FUNCT__ 3618 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3619 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3620 { 3621 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3622 PetscErrorCode ierr; 3623 3624 PetscFunctionBegin; 3625 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3626 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3627 PetscFunctionReturn(0); 3628 } 3629 3630 #undef __FUNCT__ 3631 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3632 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3633 { 3634 Mat_MPIAIJ *b; 3635 PetscErrorCode ierr; 3636 3637 PetscFunctionBegin; 3638 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3639 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3640 b = (Mat_MPIAIJ*)B->data; 3641 3642 if (!B->preallocated) { 3643 /* Explicitly create 2 MATSEQAIJ matrices. */ 3644 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3645 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3646 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3647 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3648 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3649 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3650 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3651 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3652 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3653 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3654 } 3655 3656 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3657 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3658 B->preallocated = PETSC_TRUE; 3659 PetscFunctionReturn(0); 3660 } 3661 3662 #undef __FUNCT__ 3663 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3664 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3665 { 3666 Mat mat; 3667 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3668 PetscErrorCode ierr; 3669 3670 PetscFunctionBegin; 3671 *newmat = 0; 3672 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3673 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3674 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3675 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3676 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3677 a = (Mat_MPIAIJ*)mat->data; 3678 3679 mat->factortype = matin->factortype; 3680 mat->rmap->bs = matin->rmap->bs; 3681 mat->cmap->bs = matin->cmap->bs; 3682 mat->assembled = PETSC_TRUE; 3683 mat->insertmode = NOT_SET_VALUES; 3684 mat->preallocated = PETSC_TRUE; 3685 3686 a->size = oldmat->size; 3687 a->rank = oldmat->rank; 3688 a->donotstash = oldmat->donotstash; 3689 a->roworiented = oldmat->roworiented; 3690 a->rowindices = 0; 3691 a->rowvalues = 0; 3692 a->getrowactive = PETSC_FALSE; 3693 3694 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3695 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3696 3697 if (oldmat->colmap) { 3698 #if defined(PETSC_USE_CTABLE) 3699 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3700 #else 3701 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3702 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3703 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3704 #endif 3705 } else a->colmap = 0; 3706 if (oldmat->garray) { 3707 PetscInt len; 3708 len = oldmat->B->cmap->n; 3709 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3710 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3711 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3712 } else a->garray = 0; 3713 3714 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3715 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3716 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3717 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3718 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3719 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3720 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3721 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3722 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3723 *newmat = mat; 3724 PetscFunctionReturn(0); 3725 } 3726 3727 3728 3729 #undef __FUNCT__ 3730 #define __FUNCT__ "MatLoad_MPIAIJ" 3731 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3732 { 3733 PetscScalar *vals,*svals; 3734 MPI_Comm comm; 3735 PetscErrorCode ierr; 3736 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3737 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3738 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3739 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3740 PetscInt cend,cstart,n,*rowners,sizesset=1; 3741 int fd; 3742 PetscInt bs = 1; 3743 3744 PetscFunctionBegin; 3745 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3746 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3747 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3748 if (!rank) { 3749 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3750 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3751 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3752 } 3753 3754 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3755 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3756 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3757 3758 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3759 3760 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3761 M = header[1]; N = header[2]; 3762 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3763 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3764 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3765 3766 /* If global sizes are set, check if they are consistent with that given in the file */ 3767 if (sizesset) { 3768 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3769 } 3770 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3771 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3772 3773 /* determine ownership of all (block) rows */ 3774 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3775 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3776 else m = newMat->rmap->n; /* Set by user */ 3777 3778 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3779 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3780 3781 /* First process needs enough room for process with most rows */ 3782 if (!rank) { 3783 mmax = rowners[1]; 3784 for (i=2; i<=size; i++) { 3785 mmax = PetscMax(mmax, rowners[i]); 3786 } 3787 } else mmax = -1; /* unused, but compilers complain */ 3788 3789 rowners[0] = 0; 3790 for (i=2; i<=size; i++) { 3791 rowners[i] += rowners[i-1]; 3792 } 3793 rstart = rowners[rank]; 3794 rend = rowners[rank+1]; 3795 3796 /* distribute row lengths to all processors */ 3797 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3798 if (!rank) { 3799 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3800 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3801 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3802 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3803 for (j=0; j<m; j++) { 3804 procsnz[0] += ourlens[j]; 3805 } 3806 for (i=1; i<size; i++) { 3807 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3808 /* calculate the number of nonzeros on each processor */ 3809 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3810 procsnz[i] += rowlengths[j]; 3811 } 3812 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3813 } 3814 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3815 } else { 3816 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3817 } 3818 3819 if (!rank) { 3820 /* determine max buffer needed and allocate it */ 3821 maxnz = 0; 3822 for (i=0; i<size; i++) { 3823 maxnz = PetscMax(maxnz,procsnz[i]); 3824 } 3825 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3826 3827 /* read in my part of the matrix column indices */ 3828 nz = procsnz[0]; 3829 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3830 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3831 3832 /* read in every one elses and ship off */ 3833 for (i=1; i<size; i++) { 3834 nz = procsnz[i]; 3835 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3836 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3837 } 3838 ierr = PetscFree(cols);CHKERRQ(ierr); 3839 } else { 3840 /* determine buffer space needed for message */ 3841 nz = 0; 3842 for (i=0; i<m; i++) { 3843 nz += ourlens[i]; 3844 } 3845 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3846 3847 /* receive message of column indices*/ 3848 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3849 } 3850 3851 /* determine column ownership if matrix is not square */ 3852 if (N != M) { 3853 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3854 else n = newMat->cmap->n; 3855 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3856 cstart = cend - n; 3857 } else { 3858 cstart = rstart; 3859 cend = rend; 3860 n = cend - cstart; 3861 } 3862 3863 /* loop over local rows, determining number of off diagonal entries */ 3864 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3865 jj = 0; 3866 for (i=0; i<m; i++) { 3867 for (j=0; j<ourlens[i]; j++) { 3868 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3869 jj++; 3870 } 3871 } 3872 3873 for (i=0; i<m; i++) { 3874 ourlens[i] -= offlens[i]; 3875 } 3876 if (!sizesset) { 3877 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3878 } 3879 3880 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3881 3882 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3883 3884 for (i=0; i<m; i++) { 3885 ourlens[i] += offlens[i]; 3886 } 3887 3888 if (!rank) { 3889 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3890 3891 /* read in my part of the matrix numerical values */ 3892 nz = procsnz[0]; 3893 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3894 3895 /* insert into matrix */ 3896 jj = rstart; 3897 smycols = mycols; 3898 svals = vals; 3899 for (i=0; i<m; i++) { 3900 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3901 smycols += ourlens[i]; 3902 svals += ourlens[i]; 3903 jj++; 3904 } 3905 3906 /* read in other processors and ship out */ 3907 for (i=1; i<size; i++) { 3908 nz = procsnz[i]; 3909 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3910 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3911 } 3912 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3913 } else { 3914 /* receive numeric values */ 3915 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3916 3917 /* receive message of values*/ 3918 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3919 3920 /* insert into matrix */ 3921 jj = rstart; 3922 smycols = mycols; 3923 svals = vals; 3924 for (i=0; i<m; i++) { 3925 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3926 smycols += ourlens[i]; 3927 svals += ourlens[i]; 3928 jj++; 3929 } 3930 } 3931 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3932 ierr = PetscFree(vals);CHKERRQ(ierr); 3933 ierr = PetscFree(mycols);CHKERRQ(ierr); 3934 ierr = PetscFree(rowners);CHKERRQ(ierr); 3935 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3936 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3937 PetscFunctionReturn(0); 3938 } 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3942 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3943 { 3944 PetscErrorCode ierr; 3945 IS iscol_local; 3946 PetscInt csize; 3947 3948 PetscFunctionBegin; 3949 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3950 if (call == MAT_REUSE_MATRIX) { 3951 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3952 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3953 } else { 3954 PetscInt cbs; 3955 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3956 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3957 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3958 } 3959 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3960 if (call == MAT_INITIAL_MATRIX) { 3961 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3962 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3963 } 3964 PetscFunctionReturn(0); 3965 } 3966 3967 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3970 /* 3971 Not great since it makes two copies of the submatrix, first an SeqAIJ 3972 in local and then by concatenating the local matrices the end result. 3973 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3974 3975 Note: This requires a sequential iscol with all indices. 3976 */ 3977 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3978 { 3979 PetscErrorCode ierr; 3980 PetscMPIInt rank,size; 3981 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3982 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3983 PetscBool allcolumns, colflag; 3984 Mat M,Mreuse; 3985 MatScalar *vwork,*aa; 3986 MPI_Comm comm; 3987 Mat_SeqAIJ *aij; 3988 3989 PetscFunctionBegin; 3990 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3991 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3992 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3993 3994 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3995 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3996 if (colflag && ncol == mat->cmap->N) { 3997 allcolumns = PETSC_TRUE; 3998 } else { 3999 allcolumns = PETSC_FALSE; 4000 } 4001 if (call == MAT_REUSE_MATRIX) { 4002 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 4003 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 4004 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 4005 } else { 4006 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 4007 } 4008 4009 /* 4010 m - number of local rows 4011 n - number of columns (same on all processors) 4012 rstart - first row in new global matrix generated 4013 */ 4014 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 4015 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 4016 if (call == MAT_INITIAL_MATRIX) { 4017 aij = (Mat_SeqAIJ*)(Mreuse)->data; 4018 ii = aij->i; 4019 jj = aij->j; 4020 4021 /* 4022 Determine the number of non-zeros in the diagonal and off-diagonal 4023 portions of the matrix in order to do correct preallocation 4024 */ 4025 4026 /* first get start and end of "diagonal" columns */ 4027 if (csize == PETSC_DECIDE) { 4028 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 4029 if (mglobal == n) { /* square matrix */ 4030 nlocal = m; 4031 } else { 4032 nlocal = n/size + ((n % size) > rank); 4033 } 4034 } else { 4035 nlocal = csize; 4036 } 4037 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4038 rstart = rend - nlocal; 4039 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 4040 4041 /* next, compute all the lengths */ 4042 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 4043 olens = dlens + m; 4044 for (i=0; i<m; i++) { 4045 jend = ii[i+1] - ii[i]; 4046 olen = 0; 4047 dlen = 0; 4048 for (j=0; j<jend; j++) { 4049 if (*jj < rstart || *jj >= rend) olen++; 4050 else dlen++; 4051 jj++; 4052 } 4053 olens[i] = olen; 4054 dlens[i] = dlen; 4055 } 4056 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 4057 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 4058 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 4059 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 4060 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 4061 ierr = PetscFree(dlens);CHKERRQ(ierr); 4062 } else { 4063 PetscInt ml,nl; 4064 4065 M = *newmat; 4066 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 4067 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 4068 ierr = MatZeroEntries(M);CHKERRQ(ierr); 4069 /* 4070 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 4071 rather than the slower MatSetValues(). 4072 */ 4073 M->was_assembled = PETSC_TRUE; 4074 M->assembled = PETSC_FALSE; 4075 } 4076 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 4077 aij = (Mat_SeqAIJ*)(Mreuse)->data; 4078 ii = aij->i; 4079 jj = aij->j; 4080 aa = aij->a; 4081 for (i=0; i<m; i++) { 4082 row = rstart + i; 4083 nz = ii[i+1] - ii[i]; 4084 cwork = jj; jj += nz; 4085 vwork = aa; aa += nz; 4086 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 4087 } 4088 4089 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4090 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4091 *newmat = M; 4092 4093 /* save submatrix used in processor for next request */ 4094 if (call == MAT_INITIAL_MATRIX) { 4095 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 4096 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 4097 } 4098 PetscFunctionReturn(0); 4099 } 4100 4101 #undef __FUNCT__ 4102 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 4103 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4104 { 4105 PetscInt m,cstart, cend,j,nnz,i,d; 4106 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 4107 const PetscInt *JJ; 4108 PetscScalar *values; 4109 PetscErrorCode ierr; 4110 4111 PetscFunctionBegin; 4112 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 4113 4114 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 4115 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 4116 m = B->rmap->n; 4117 cstart = B->cmap->rstart; 4118 cend = B->cmap->rend; 4119 rstart = B->rmap->rstart; 4120 4121 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 4122 4123 #if defined(PETSC_USE_DEBUGGING) 4124 for (i=0; i<m; i++) { 4125 nnz = Ii[i+1]- Ii[i]; 4126 JJ = J + Ii[i]; 4127 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4128 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 4129 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4130 } 4131 #endif 4132 4133 for (i=0; i<m; i++) { 4134 nnz = Ii[i+1]- Ii[i]; 4135 JJ = J + Ii[i]; 4136 nnz_max = PetscMax(nnz_max,nnz); 4137 d = 0; 4138 for (j=0; j<nnz; j++) { 4139 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4140 } 4141 d_nnz[i] = d; 4142 o_nnz[i] = nnz - d; 4143 } 4144 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4145 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4146 4147 if (v) values = (PetscScalar*)v; 4148 else { 4149 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 4150 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 4151 } 4152 4153 for (i=0; i<m; i++) { 4154 ii = i + rstart; 4155 nnz = Ii[i+1]- Ii[i]; 4156 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 4157 } 4158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4160 4161 if (!v) { 4162 ierr = PetscFree(values);CHKERRQ(ierr); 4163 } 4164 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4165 PetscFunctionReturn(0); 4166 } 4167 4168 #undef __FUNCT__ 4169 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 4170 /*@ 4171 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4172 (the default parallel PETSc format). 4173 4174 Collective on MPI_Comm 4175 4176 Input Parameters: 4177 + B - the matrix 4178 . i - the indices into j for the start of each local row (starts with zero) 4179 . j - the column indices for each local row (starts with zero) 4180 - v - optional values in the matrix 4181 4182 Level: developer 4183 4184 Notes: 4185 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4186 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4187 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4188 4189 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4190 4191 The format which is used for the sparse matrix input, is equivalent to a 4192 row-major ordering.. i.e for the following matrix, the input data expected is 4193 as shown: 4194 4195 1 0 0 4196 2 0 3 P0 4197 ------- 4198 4 5 6 P1 4199 4200 Process0 [P0]: rows_owned=[0,1] 4201 i = {0,1,3} [size = nrow+1 = 2+1] 4202 j = {0,0,2} [size = nz = 6] 4203 v = {1,2,3} [size = nz = 6] 4204 4205 Process1 [P1]: rows_owned=[2] 4206 i = {0,3} [size = nrow+1 = 1+1] 4207 j = {0,1,2} [size = nz = 6] 4208 v = {4,5,6} [size = nz = 6] 4209 4210 .keywords: matrix, aij, compressed row, sparse, parallel 4211 4212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 4213 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4214 @*/ 4215 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4216 { 4217 PetscErrorCode ierr; 4218 4219 PetscFunctionBegin; 4220 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4221 PetscFunctionReturn(0); 4222 } 4223 4224 #undef __FUNCT__ 4225 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4226 /*@C 4227 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4228 (the default parallel PETSc format). For good matrix assembly performance 4229 the user should preallocate the matrix storage by setting the parameters 4230 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4231 performance can be increased by more than a factor of 50. 4232 4233 Collective on MPI_Comm 4234 4235 Input Parameters: 4236 + A - the matrix 4237 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4238 (same value is used for all local rows) 4239 . d_nnz - array containing the number of nonzeros in the various rows of the 4240 DIAGONAL portion of the local submatrix (possibly different for each row) 4241 or NULL, if d_nz is used to specify the nonzero structure. 4242 The size of this array is equal to the number of local rows, i.e 'm'. 4243 For matrices that will be factored, you must leave room for (and set) 4244 the diagonal entry even if it is zero. 4245 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4246 submatrix (same value is used for all local rows). 4247 - o_nnz - array containing the number of nonzeros in the various rows of the 4248 OFF-DIAGONAL portion of the local submatrix (possibly different for 4249 each row) or NULL, if o_nz is used to specify the nonzero 4250 structure. The size of this array is equal to the number 4251 of local rows, i.e 'm'. 4252 4253 If the *_nnz parameter is given then the *_nz parameter is ignored 4254 4255 The AIJ format (also called the Yale sparse matrix format or 4256 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4257 storage. The stored row and column indices begin with zero. 4258 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4259 4260 The parallel matrix is partitioned such that the first m0 rows belong to 4261 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4262 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4263 4264 The DIAGONAL portion of the local submatrix of a processor can be defined 4265 as the submatrix which is obtained by extraction the part corresponding to 4266 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4267 first row that belongs to the processor, r2 is the last row belonging to 4268 the this processor, and c1-c2 is range of indices of the local part of a 4269 vector suitable for applying the matrix to. This is an mxn matrix. In the 4270 common case of a square matrix, the row and column ranges are the same and 4271 the DIAGONAL part is also square. The remaining portion of the local 4272 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4273 4274 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4275 4276 You can call MatGetInfo() to get information on how effective the preallocation was; 4277 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4278 You can also run with the option -info and look for messages with the string 4279 malloc in them to see if additional memory allocation was needed. 4280 4281 Example usage: 4282 4283 Consider the following 8x8 matrix with 34 non-zero values, that is 4284 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4285 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4286 as follows: 4287 4288 .vb 4289 1 2 0 | 0 3 0 | 0 4 4290 Proc0 0 5 6 | 7 0 0 | 8 0 4291 9 0 10 | 11 0 0 | 12 0 4292 ------------------------------------- 4293 13 0 14 | 15 16 17 | 0 0 4294 Proc1 0 18 0 | 19 20 21 | 0 0 4295 0 0 0 | 22 23 0 | 24 0 4296 ------------------------------------- 4297 Proc2 25 26 27 | 0 0 28 | 29 0 4298 30 0 0 | 31 32 33 | 0 34 4299 .ve 4300 4301 This can be represented as a collection of submatrices as: 4302 4303 .vb 4304 A B C 4305 D E F 4306 G H I 4307 .ve 4308 4309 Where the submatrices A,B,C are owned by proc0, D,E,F are 4310 owned by proc1, G,H,I are owned by proc2. 4311 4312 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4313 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4314 The 'M','N' parameters are 8,8, and have the same values on all procs. 4315 4316 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4317 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4318 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4319 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4320 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4321 matrix, ans [DF] as another SeqAIJ matrix. 4322 4323 When d_nz, o_nz parameters are specified, d_nz storage elements are 4324 allocated for every row of the local diagonal submatrix, and o_nz 4325 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4326 One way to choose d_nz and o_nz is to use the max nonzerors per local 4327 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4328 In this case, the values of d_nz,o_nz are: 4329 .vb 4330 proc0 : dnz = 2, o_nz = 2 4331 proc1 : dnz = 3, o_nz = 2 4332 proc2 : dnz = 1, o_nz = 4 4333 .ve 4334 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4335 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4336 for proc3. i.e we are using 12+15+10=37 storage locations to store 4337 34 values. 4338 4339 When d_nnz, o_nnz parameters are specified, the storage is specified 4340 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4341 In the above case the values for d_nnz,o_nnz are: 4342 .vb 4343 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4344 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4345 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4346 .ve 4347 Here the space allocated is sum of all the above values i.e 34, and 4348 hence pre-allocation is perfect. 4349 4350 Level: intermediate 4351 4352 .keywords: matrix, aij, compressed row, sparse, parallel 4353 4354 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4355 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4356 @*/ 4357 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4358 { 4359 PetscErrorCode ierr; 4360 4361 PetscFunctionBegin; 4362 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4363 PetscValidType(B,1); 4364 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4365 PetscFunctionReturn(0); 4366 } 4367 4368 #undef __FUNCT__ 4369 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4370 /*@ 4371 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4372 CSR format the local rows. 4373 4374 Collective on MPI_Comm 4375 4376 Input Parameters: 4377 + comm - MPI communicator 4378 . m - number of local rows (Cannot be PETSC_DECIDE) 4379 . n - This value should be the same as the local size used in creating the 4380 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4381 calculated if N is given) For square matrices n is almost always m. 4382 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4383 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4384 . i - row indices 4385 . j - column indices 4386 - a - matrix values 4387 4388 Output Parameter: 4389 . mat - the matrix 4390 4391 Level: intermediate 4392 4393 Notes: 4394 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4395 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4396 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4397 4398 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4399 4400 The format which is used for the sparse matrix input, is equivalent to a 4401 row-major ordering.. i.e for the following matrix, the input data expected is 4402 as shown: 4403 4404 1 0 0 4405 2 0 3 P0 4406 ------- 4407 4 5 6 P1 4408 4409 Process0 [P0]: rows_owned=[0,1] 4410 i = {0,1,3} [size = nrow+1 = 2+1] 4411 j = {0,0,2} [size = nz = 6] 4412 v = {1,2,3} [size = nz = 6] 4413 4414 Process1 [P1]: rows_owned=[2] 4415 i = {0,3} [size = nrow+1 = 1+1] 4416 j = {0,1,2} [size = nz = 6] 4417 v = {4,5,6} [size = nz = 6] 4418 4419 .keywords: matrix, aij, compressed row, sparse, parallel 4420 4421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4422 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4423 @*/ 4424 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4425 { 4426 PetscErrorCode ierr; 4427 4428 PetscFunctionBegin; 4429 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4430 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4431 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4432 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4433 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4434 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4435 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4436 PetscFunctionReturn(0); 4437 } 4438 4439 #undef __FUNCT__ 4440 #define __FUNCT__ "MatCreateAIJ" 4441 /*@C 4442 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4443 (the default parallel PETSc format). For good matrix assembly performance 4444 the user should preallocate the matrix storage by setting the parameters 4445 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4446 performance can be increased by more than a factor of 50. 4447 4448 Collective on MPI_Comm 4449 4450 Input Parameters: 4451 + comm - MPI communicator 4452 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4453 This value should be the same as the local size used in creating the 4454 y vector for the matrix-vector product y = Ax. 4455 . n - This value should be the same as the local size used in creating the 4456 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4457 calculated if N is given) For square matrices n is almost always m. 4458 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4459 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4460 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4461 (same value is used for all local rows) 4462 . d_nnz - array containing the number of nonzeros in the various rows of the 4463 DIAGONAL portion of the local submatrix (possibly different for each row) 4464 or NULL, if d_nz is used to specify the nonzero structure. 4465 The size of this array is equal to the number of local rows, i.e 'm'. 4466 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4467 submatrix (same value is used for all local rows). 4468 - o_nnz - array containing the number of nonzeros in the various rows of the 4469 OFF-DIAGONAL portion of the local submatrix (possibly different for 4470 each row) or NULL, if o_nz is used to specify the nonzero 4471 structure. The size of this array is equal to the number 4472 of local rows, i.e 'm'. 4473 4474 Output Parameter: 4475 . A - the matrix 4476 4477 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4478 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4479 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4480 4481 Notes: 4482 If the *_nnz parameter is given then the *_nz parameter is ignored 4483 4484 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4485 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4486 storage requirements for this matrix. 4487 4488 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4489 processor than it must be used on all processors that share the object for 4490 that argument. 4491 4492 The user MUST specify either the local or global matrix dimensions 4493 (possibly both). 4494 4495 The parallel matrix is partitioned across processors such that the 4496 first m0 rows belong to process 0, the next m1 rows belong to 4497 process 1, the next m2 rows belong to process 2 etc.. where 4498 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4499 values corresponding to [m x N] submatrix. 4500 4501 The columns are logically partitioned with the n0 columns belonging 4502 to 0th partition, the next n1 columns belonging to the next 4503 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4504 4505 The DIAGONAL portion of the local submatrix on any given processor 4506 is the submatrix corresponding to the rows and columns m,n 4507 corresponding to the given processor. i.e diagonal matrix on 4508 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4509 etc. The remaining portion of the local submatrix [m x (N-n)] 4510 constitute the OFF-DIAGONAL portion. The example below better 4511 illustrates this concept. 4512 4513 For a square global matrix we define each processor's diagonal portion 4514 to be its local rows and the corresponding columns (a square submatrix); 4515 each processor's off-diagonal portion encompasses the remainder of the 4516 local matrix (a rectangular submatrix). 4517 4518 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4519 4520 When calling this routine with a single process communicator, a matrix of 4521 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4522 type of communicator, use the construction mechanism: 4523 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4524 4525 By default, this format uses inodes (identical nodes) when possible. 4526 We search for consecutive rows with the same nonzero structure, thereby 4527 reusing matrix information to achieve increased efficiency. 4528 4529 Options Database Keys: 4530 + -mat_no_inode - Do not use inodes 4531 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4532 - -mat_aij_oneindex - Internally use indexing starting at 1 4533 rather than 0. Note that when calling MatSetValues(), 4534 the user still MUST index entries starting at 0! 4535 4536 4537 Example usage: 4538 4539 Consider the following 8x8 matrix with 34 non-zero values, that is 4540 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4541 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4542 as follows: 4543 4544 .vb 4545 1 2 0 | 0 3 0 | 0 4 4546 Proc0 0 5 6 | 7 0 0 | 8 0 4547 9 0 10 | 11 0 0 | 12 0 4548 ------------------------------------- 4549 13 0 14 | 15 16 17 | 0 0 4550 Proc1 0 18 0 | 19 20 21 | 0 0 4551 0 0 0 | 22 23 0 | 24 0 4552 ------------------------------------- 4553 Proc2 25 26 27 | 0 0 28 | 29 0 4554 30 0 0 | 31 32 33 | 0 34 4555 .ve 4556 4557 This can be represented as a collection of submatrices as: 4558 4559 .vb 4560 A B C 4561 D E F 4562 G H I 4563 .ve 4564 4565 Where the submatrices A,B,C are owned by proc0, D,E,F are 4566 owned by proc1, G,H,I are owned by proc2. 4567 4568 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4569 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4570 The 'M','N' parameters are 8,8, and have the same values on all procs. 4571 4572 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4573 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4574 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4575 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4576 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4577 matrix, ans [DF] as another SeqAIJ matrix. 4578 4579 When d_nz, o_nz parameters are specified, d_nz storage elements are 4580 allocated for every row of the local diagonal submatrix, and o_nz 4581 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4582 One way to choose d_nz and o_nz is to use the max nonzerors per local 4583 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4584 In this case, the values of d_nz,o_nz are: 4585 .vb 4586 proc0 : dnz = 2, o_nz = 2 4587 proc1 : dnz = 3, o_nz = 2 4588 proc2 : dnz = 1, o_nz = 4 4589 .ve 4590 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4591 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4592 for proc3. i.e we are using 12+15+10=37 storage locations to store 4593 34 values. 4594 4595 When d_nnz, o_nnz parameters are specified, the storage is specified 4596 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4597 In the above case the values for d_nnz,o_nnz are: 4598 .vb 4599 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4600 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4601 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4602 .ve 4603 Here the space allocated is sum of all the above values i.e 34, and 4604 hence pre-allocation is perfect. 4605 4606 Level: intermediate 4607 4608 .keywords: matrix, aij, compressed row, sparse, parallel 4609 4610 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4611 MPIAIJ, MatCreateMPIAIJWithArrays() 4612 @*/ 4613 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4614 { 4615 PetscErrorCode ierr; 4616 PetscMPIInt size; 4617 4618 PetscFunctionBegin; 4619 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4620 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4621 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4622 if (size > 1) { 4623 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4624 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4625 } else { 4626 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4627 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4628 } 4629 PetscFunctionReturn(0); 4630 } 4631 4632 #undef __FUNCT__ 4633 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4634 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4635 { 4636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4637 4638 PetscFunctionBegin; 4639 *Ad = a->A; 4640 *Ao = a->B; 4641 *colmap = a->garray; 4642 PetscFunctionReturn(0); 4643 } 4644 4645 #undef __FUNCT__ 4646 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4647 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4648 { 4649 PetscErrorCode ierr; 4650 PetscInt i; 4651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4652 4653 PetscFunctionBegin; 4654 if (coloring->ctype == IS_COLORING_GLOBAL) { 4655 ISColoringValue *allcolors,*colors; 4656 ISColoring ocoloring; 4657 4658 /* set coloring for diagonal portion */ 4659 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4660 4661 /* set coloring for off-diagonal portion */ 4662 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4663 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4664 for (i=0; i<a->B->cmap->n; i++) { 4665 colors[i] = allcolors[a->garray[i]]; 4666 } 4667 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4668 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4669 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4670 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4671 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4672 ISColoringValue *colors; 4673 PetscInt *larray; 4674 ISColoring ocoloring; 4675 4676 /* set coloring for diagonal portion */ 4677 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4678 for (i=0; i<a->A->cmap->n; i++) { 4679 larray[i] = i + A->cmap->rstart; 4680 } 4681 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4682 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4683 for (i=0; i<a->A->cmap->n; i++) { 4684 colors[i] = coloring->colors[larray[i]]; 4685 } 4686 ierr = PetscFree(larray);CHKERRQ(ierr); 4687 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4688 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4689 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4690 4691 /* set coloring for off-diagonal portion */ 4692 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4693 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4694 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4695 for (i=0; i<a->B->cmap->n; i++) { 4696 colors[i] = coloring->colors[larray[i]]; 4697 } 4698 ierr = PetscFree(larray);CHKERRQ(ierr); 4699 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4700 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4701 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4702 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4703 PetscFunctionReturn(0); 4704 } 4705 4706 #undef __FUNCT__ 4707 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4708 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4709 { 4710 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4711 PetscErrorCode ierr; 4712 4713 PetscFunctionBegin; 4714 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4715 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4716 PetscFunctionReturn(0); 4717 } 4718 4719 #undef __FUNCT__ 4720 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4721 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4722 { 4723 PetscErrorCode ierr; 4724 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4725 PetscInt *indx; 4726 4727 PetscFunctionBegin; 4728 /* This routine will ONLY return MPIAIJ type matrix */ 4729 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4730 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4731 if (n == PETSC_DECIDE) { 4732 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4733 } 4734 /* Check sum(n) = N */ 4735 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4736 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4737 4738 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4739 rstart -= m; 4740 4741 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4742 for (i=0; i<m; i++) { 4743 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4744 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4745 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4746 } 4747 4748 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4749 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4750 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4751 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4752 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4753 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4754 PetscFunctionReturn(0); 4755 } 4756 4757 #undef __FUNCT__ 4758 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4759 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4760 { 4761 PetscErrorCode ierr; 4762 PetscInt m,N,i,rstart,nnz,Ii; 4763 PetscInt *indx; 4764 PetscScalar *values; 4765 4766 PetscFunctionBegin; 4767 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4768 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4769 for (i=0; i<m; i++) { 4770 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4771 Ii = i + rstart; 4772 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4773 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4774 } 4775 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4776 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4777 PetscFunctionReturn(0); 4778 } 4779 4780 #undef __FUNCT__ 4781 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4782 /*@ 4783 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4784 matrices from each processor 4785 4786 Collective on MPI_Comm 4787 4788 Input Parameters: 4789 + comm - the communicators the parallel matrix will live on 4790 . inmat - the input sequential matrices 4791 . n - number of local columns (or PETSC_DECIDE) 4792 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4793 4794 Output Parameter: 4795 . outmat - the parallel matrix generated 4796 4797 Level: advanced 4798 4799 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4800 4801 @*/ 4802 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4803 { 4804 PetscErrorCode ierr; 4805 4806 PetscFunctionBegin; 4807 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4808 if (scall == MAT_INITIAL_MATRIX) { 4809 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4810 } 4811 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4812 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4813 PetscFunctionReturn(0); 4814 } 4815 4816 #undef __FUNCT__ 4817 #define __FUNCT__ "MatFileSplit" 4818 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4819 { 4820 PetscErrorCode ierr; 4821 PetscMPIInt rank; 4822 PetscInt m,N,i,rstart,nnz; 4823 size_t len; 4824 const PetscInt *indx; 4825 PetscViewer out; 4826 char *name; 4827 Mat B; 4828 const PetscScalar *values; 4829 4830 PetscFunctionBegin; 4831 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4832 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4833 /* Should this be the type of the diagonal block of A? */ 4834 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4835 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4836 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4837 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4838 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4839 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4840 for (i=0; i<m; i++) { 4841 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4842 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4843 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4844 } 4845 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4846 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4847 4848 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4849 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4850 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4851 sprintf(name,"%s.%d",outfile,rank); 4852 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4853 ierr = PetscFree(name);CHKERRQ(ierr); 4854 ierr = MatView(B,out);CHKERRQ(ierr); 4855 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4856 ierr = MatDestroy(&B);CHKERRQ(ierr); 4857 PetscFunctionReturn(0); 4858 } 4859 4860 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4861 #undef __FUNCT__ 4862 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4863 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4864 { 4865 PetscErrorCode ierr; 4866 Mat_Merge_SeqsToMPI *merge; 4867 PetscContainer container; 4868 4869 PetscFunctionBegin; 4870 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4871 if (container) { 4872 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4873 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4874 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4875 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4876 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4877 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4878 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4879 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4880 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4881 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4882 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4883 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4884 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4885 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4886 ierr = PetscFree(merge);CHKERRQ(ierr); 4887 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4888 } 4889 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4890 PetscFunctionReturn(0); 4891 } 4892 4893 #include <../src/mat/utils/freespace.h> 4894 #include <petscbt.h> 4895 4896 #undef __FUNCT__ 4897 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4898 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4899 { 4900 PetscErrorCode ierr; 4901 MPI_Comm comm; 4902 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4903 PetscMPIInt size,rank,taga,*len_s; 4904 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4905 PetscInt proc,m; 4906 PetscInt **buf_ri,**buf_rj; 4907 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4908 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4909 MPI_Request *s_waits,*r_waits; 4910 MPI_Status *status; 4911 MatScalar *aa=a->a; 4912 MatScalar **abuf_r,*ba_i; 4913 Mat_Merge_SeqsToMPI *merge; 4914 PetscContainer container; 4915 4916 PetscFunctionBegin; 4917 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4918 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4919 4920 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4921 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4922 4923 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4924 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4925 4926 bi = merge->bi; 4927 bj = merge->bj; 4928 buf_ri = merge->buf_ri; 4929 buf_rj = merge->buf_rj; 4930 4931 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4932 owners = merge->rowmap->range; 4933 len_s = merge->len_s; 4934 4935 /* send and recv matrix values */ 4936 /*-----------------------------*/ 4937 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4938 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4939 4940 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4941 for (proc=0,k=0; proc<size; proc++) { 4942 if (!len_s[proc]) continue; 4943 i = owners[proc]; 4944 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4945 k++; 4946 } 4947 4948 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4949 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4950 ierr = PetscFree(status);CHKERRQ(ierr); 4951 4952 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4953 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4954 4955 /* insert mat values of mpimat */ 4956 /*----------------------------*/ 4957 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4958 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4959 4960 for (k=0; k<merge->nrecv; k++) { 4961 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4962 nrows = *(buf_ri_k[k]); 4963 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4964 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4965 } 4966 4967 /* set values of ba */ 4968 m = merge->rowmap->n; 4969 for (i=0; i<m; i++) { 4970 arow = owners[rank] + i; 4971 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4972 bnzi = bi[i+1] - bi[i]; 4973 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4974 4975 /* add local non-zero vals of this proc's seqmat into ba */ 4976 anzi = ai[arow+1] - ai[arow]; 4977 aj = a->j + ai[arow]; 4978 aa = a->a + ai[arow]; 4979 nextaj = 0; 4980 for (j=0; nextaj<anzi; j++) { 4981 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4982 ba_i[j] += aa[nextaj++]; 4983 } 4984 } 4985 4986 /* add received vals into ba */ 4987 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4988 /* i-th row */ 4989 if (i == *nextrow[k]) { 4990 anzi = *(nextai[k]+1) - *nextai[k]; 4991 aj = buf_rj[k] + *(nextai[k]); 4992 aa = abuf_r[k] + *(nextai[k]); 4993 nextaj = 0; 4994 for (j=0; nextaj<anzi; j++) { 4995 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4996 ba_i[j] += aa[nextaj++]; 4997 } 4998 } 4999 nextrow[k]++; nextai[k]++; 5000 } 5001 } 5002 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 5003 } 5004 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5005 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5006 5007 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 5008 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 5009 ierr = PetscFree(ba_i);CHKERRQ(ierr); 5010 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5011 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 5012 PetscFunctionReturn(0); 5013 } 5014 5015 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 5016 5017 #undef __FUNCT__ 5018 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 5019 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 5020 { 5021 PetscErrorCode ierr; 5022 Mat B_mpi; 5023 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 5024 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 5025 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 5026 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 5027 PetscInt len,proc,*dnz,*onz,bs,cbs; 5028 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 5029 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 5030 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 5031 MPI_Status *status; 5032 PetscFreeSpaceList free_space=NULL,current_space=NULL; 5033 PetscBT lnkbt; 5034 Mat_Merge_SeqsToMPI *merge; 5035 PetscContainer container; 5036 5037 PetscFunctionBegin; 5038 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5039 5040 /* make sure it is a PETSc comm */ 5041 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 5042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5043 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5044 5045 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 5046 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 5047 5048 /* determine row ownership */ 5049 /*---------------------------------------------------------*/ 5050 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 5051 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 5052 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 5053 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 5054 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 5055 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 5056 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 5057 5058 m = merge->rowmap->n; 5059 owners = merge->rowmap->range; 5060 5061 /* determine the number of messages to send, their lengths */ 5062 /*---------------------------------------------------------*/ 5063 len_s = merge->len_s; 5064 5065 len = 0; /* length of buf_si[] */ 5066 merge->nsend = 0; 5067 for (proc=0; proc<size; proc++) { 5068 len_si[proc] = 0; 5069 if (proc == rank) { 5070 len_s[proc] = 0; 5071 } else { 5072 len_si[proc] = owners[proc+1] - owners[proc] + 1; 5073 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 5074 } 5075 if (len_s[proc]) { 5076 merge->nsend++; 5077 nrows = 0; 5078 for (i=owners[proc]; i<owners[proc+1]; i++) { 5079 if (ai[i+1] > ai[i]) nrows++; 5080 } 5081 len_si[proc] = 2*(nrows+1); 5082 len += len_si[proc]; 5083 } 5084 } 5085 5086 /* determine the number and length of messages to receive for ij-structure */ 5087 /*-------------------------------------------------------------------------*/ 5088 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 5089 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 5090 5091 /* post the Irecv of j-structure */ 5092 /*-------------------------------*/ 5093 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 5094 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 5095 5096 /* post the Isend of j-structure */ 5097 /*--------------------------------*/ 5098 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 5099 5100 for (proc=0, k=0; proc<size; proc++) { 5101 if (!len_s[proc]) continue; 5102 i = owners[proc]; 5103 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 5104 k++; 5105 } 5106 5107 /* receives and sends of j-structure are complete */ 5108 /*------------------------------------------------*/ 5109 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 5110 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 5111 5112 /* send and recv i-structure */ 5113 /*---------------------------*/ 5114 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 5115 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 5116 5117 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 5118 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 5119 for (proc=0,k=0; proc<size; proc++) { 5120 if (!len_s[proc]) continue; 5121 /* form outgoing message for i-structure: 5122 buf_si[0]: nrows to be sent 5123 [1:nrows]: row index (global) 5124 [nrows+1:2*nrows+1]: i-structure index 5125 */ 5126 /*-------------------------------------------*/ 5127 nrows = len_si[proc]/2 - 1; 5128 buf_si_i = buf_si + nrows+1; 5129 buf_si[0] = nrows; 5130 buf_si_i[0] = 0; 5131 nrows = 0; 5132 for (i=owners[proc]; i<owners[proc+1]; i++) { 5133 anzi = ai[i+1] - ai[i]; 5134 if (anzi) { 5135 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 5136 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 5137 nrows++; 5138 } 5139 } 5140 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 5141 k++; 5142 buf_si += len_si[proc]; 5143 } 5144 5145 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 5146 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 5147 5148 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 5149 for (i=0; i<merge->nrecv; i++) { 5150 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 5151 } 5152 5153 ierr = PetscFree(len_si);CHKERRQ(ierr); 5154 ierr = PetscFree(len_ri);CHKERRQ(ierr); 5155 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 5156 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 5157 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 5158 ierr = PetscFree(buf_s);CHKERRQ(ierr); 5159 ierr = PetscFree(status);CHKERRQ(ierr); 5160 5161 /* compute a local seq matrix in each processor */ 5162 /*----------------------------------------------*/ 5163 /* allocate bi array and free space for accumulating nonzero column info */ 5164 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 5165 bi[0] = 0; 5166 5167 /* create and initialize a linked list */ 5168 nlnk = N+1; 5169 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5170 5171 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5172 len = ai[owners[rank+1]] - ai[owners[rank]]; 5173 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 5174 5175 current_space = free_space; 5176 5177 /* determine symbolic info for each local row */ 5178 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 5179 5180 for (k=0; k<merge->nrecv; k++) { 5181 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5182 nrows = *buf_ri_k[k]; 5183 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5184 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5185 } 5186 5187 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5188 len = 0; 5189 for (i=0; i<m; i++) { 5190 bnzi = 0; 5191 /* add local non-zero cols of this proc's seqmat into lnk */ 5192 arow = owners[rank] + i; 5193 anzi = ai[arow+1] - ai[arow]; 5194 aj = a->j + ai[arow]; 5195 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5196 bnzi += nlnk; 5197 /* add received col data into lnk */ 5198 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5199 if (i == *nextrow[k]) { /* i-th row */ 5200 anzi = *(nextai[k]+1) - *nextai[k]; 5201 aj = buf_rj[k] + *nextai[k]; 5202 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5203 bnzi += nlnk; 5204 nextrow[k]++; nextai[k]++; 5205 } 5206 } 5207 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5208 5209 /* if free space is not available, make more free space */ 5210 if (current_space->local_remaining<bnzi) { 5211 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5212 nspacedouble++; 5213 } 5214 /* copy data into free space, then initialize lnk */ 5215 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5216 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5217 5218 current_space->array += bnzi; 5219 current_space->local_used += bnzi; 5220 current_space->local_remaining -= bnzi; 5221 5222 bi[i+1] = bi[i] + bnzi; 5223 } 5224 5225 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5226 5227 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5228 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5229 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5230 5231 /* create symbolic parallel matrix B_mpi */ 5232 /*---------------------------------------*/ 5233 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5234 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5235 if (n==PETSC_DECIDE) { 5236 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5237 } else { 5238 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5239 } 5240 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5241 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5242 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5243 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5244 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5245 5246 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5247 B_mpi->assembled = PETSC_FALSE; 5248 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5249 merge->bi = bi; 5250 merge->bj = bj; 5251 merge->buf_ri = buf_ri; 5252 merge->buf_rj = buf_rj; 5253 merge->coi = NULL; 5254 merge->coj = NULL; 5255 merge->owners_co = NULL; 5256 5257 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5258 5259 /* attach the supporting struct to B_mpi for reuse */ 5260 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5261 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5262 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5263 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5264 *mpimat = B_mpi; 5265 5266 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5267 PetscFunctionReturn(0); 5268 } 5269 5270 #undef __FUNCT__ 5271 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5272 /*@C 5273 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5274 matrices from each processor 5275 5276 Collective on MPI_Comm 5277 5278 Input Parameters: 5279 + comm - the communicators the parallel matrix will live on 5280 . seqmat - the input sequential matrices 5281 . m - number of local rows (or PETSC_DECIDE) 5282 . n - number of local columns (or PETSC_DECIDE) 5283 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5284 5285 Output Parameter: 5286 . mpimat - the parallel matrix generated 5287 5288 Level: advanced 5289 5290 Notes: 5291 The dimensions of the sequential matrix in each processor MUST be the same. 5292 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5293 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5294 @*/ 5295 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5296 { 5297 PetscErrorCode ierr; 5298 PetscMPIInt size; 5299 5300 PetscFunctionBegin; 5301 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5302 if (size == 1) { 5303 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5304 if (scall == MAT_INITIAL_MATRIX) { 5305 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5306 } else { 5307 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5308 } 5309 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5310 PetscFunctionReturn(0); 5311 } 5312 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5313 if (scall == MAT_INITIAL_MATRIX) { 5314 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5315 } 5316 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5317 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5318 PetscFunctionReturn(0); 5319 } 5320 5321 #undef __FUNCT__ 5322 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5323 /*@ 5324 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5325 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5326 with MatGetSize() 5327 5328 Not Collective 5329 5330 Input Parameters: 5331 + A - the matrix 5332 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5333 5334 Output Parameter: 5335 . A_loc - the local sequential matrix generated 5336 5337 Level: developer 5338 5339 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5340 5341 @*/ 5342 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5343 { 5344 PetscErrorCode ierr; 5345 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5346 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5347 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5348 MatScalar *aa=a->a,*ba=b->a,*cam; 5349 PetscScalar *ca; 5350 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5351 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5352 PetscBool match; 5353 5354 PetscFunctionBegin; 5355 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5356 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5357 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5358 if (scall == MAT_INITIAL_MATRIX) { 5359 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5360 ci[0] = 0; 5361 for (i=0; i<am; i++) { 5362 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5363 } 5364 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5365 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5366 k = 0; 5367 for (i=0; i<am; i++) { 5368 ncols_o = bi[i+1] - bi[i]; 5369 ncols_d = ai[i+1] - ai[i]; 5370 /* off-diagonal portion of A */ 5371 for (jo=0; jo<ncols_o; jo++) { 5372 col = cmap[*bj]; 5373 if (col >= cstart) break; 5374 cj[k] = col; bj++; 5375 ca[k++] = *ba++; 5376 } 5377 /* diagonal portion of A */ 5378 for (j=0; j<ncols_d; j++) { 5379 cj[k] = cstart + *aj++; 5380 ca[k++] = *aa++; 5381 } 5382 /* off-diagonal portion of A */ 5383 for (j=jo; j<ncols_o; j++) { 5384 cj[k] = cmap[*bj++]; 5385 ca[k++] = *ba++; 5386 } 5387 } 5388 /* put together the new matrix */ 5389 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5390 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5391 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5392 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5393 mat->free_a = PETSC_TRUE; 5394 mat->free_ij = PETSC_TRUE; 5395 mat->nonew = 0; 5396 } else if (scall == MAT_REUSE_MATRIX) { 5397 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5398 ci = mat->i; cj = mat->j; cam = mat->a; 5399 for (i=0; i<am; i++) { 5400 /* off-diagonal portion of A */ 5401 ncols_o = bi[i+1] - bi[i]; 5402 for (jo=0; jo<ncols_o; jo++) { 5403 col = cmap[*bj]; 5404 if (col >= cstart) break; 5405 *cam++ = *ba++; bj++; 5406 } 5407 /* diagonal portion of A */ 5408 ncols_d = ai[i+1] - ai[i]; 5409 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5410 /* off-diagonal portion of A */ 5411 for (j=jo; j<ncols_o; j++) { 5412 *cam++ = *ba++; bj++; 5413 } 5414 } 5415 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5416 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5417 PetscFunctionReturn(0); 5418 } 5419 5420 #undef __FUNCT__ 5421 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5422 /*@C 5423 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5424 5425 Not Collective 5426 5427 Input Parameters: 5428 + A - the matrix 5429 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5430 - row, col - index sets of rows and columns to extract (or NULL) 5431 5432 Output Parameter: 5433 . A_loc - the local sequential matrix generated 5434 5435 Level: developer 5436 5437 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5438 5439 @*/ 5440 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5441 { 5442 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5443 PetscErrorCode ierr; 5444 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5445 IS isrowa,iscola; 5446 Mat *aloc; 5447 PetscBool match; 5448 5449 PetscFunctionBegin; 5450 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5451 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5452 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5453 if (!row) { 5454 start = A->rmap->rstart; end = A->rmap->rend; 5455 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5456 } else { 5457 isrowa = *row; 5458 } 5459 if (!col) { 5460 start = A->cmap->rstart; 5461 cmap = a->garray; 5462 nzA = a->A->cmap->n; 5463 nzB = a->B->cmap->n; 5464 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5465 ncols = 0; 5466 for (i=0; i<nzB; i++) { 5467 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5468 else break; 5469 } 5470 imark = i; 5471 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5472 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5473 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5474 } else { 5475 iscola = *col; 5476 } 5477 if (scall != MAT_INITIAL_MATRIX) { 5478 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5479 aloc[0] = *A_loc; 5480 } 5481 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5482 *A_loc = aloc[0]; 5483 ierr = PetscFree(aloc);CHKERRQ(ierr); 5484 if (!row) { 5485 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5486 } 5487 if (!col) { 5488 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5489 } 5490 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5491 PetscFunctionReturn(0); 5492 } 5493 5494 #undef __FUNCT__ 5495 #define __FUNCT__ "MatGetBrowsOfAcols" 5496 /*@C 5497 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5498 5499 Collective on Mat 5500 5501 Input Parameters: 5502 + A,B - the matrices in mpiaij format 5503 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5504 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5505 5506 Output Parameter: 5507 + rowb, colb - index sets of rows and columns of B to extract 5508 - B_seq - the sequential matrix generated 5509 5510 Level: developer 5511 5512 @*/ 5513 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5514 { 5515 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5516 PetscErrorCode ierr; 5517 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5518 IS isrowb,iscolb; 5519 Mat *bseq=NULL; 5520 5521 PetscFunctionBegin; 5522 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5523 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5524 } 5525 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5526 5527 if (scall == MAT_INITIAL_MATRIX) { 5528 start = A->cmap->rstart; 5529 cmap = a->garray; 5530 nzA = a->A->cmap->n; 5531 nzB = a->B->cmap->n; 5532 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5533 ncols = 0; 5534 for (i=0; i<nzB; i++) { /* row < local row index */ 5535 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5536 else break; 5537 } 5538 imark = i; 5539 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5540 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5541 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5542 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5543 } else { 5544 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5545 isrowb = *rowb; iscolb = *colb; 5546 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5547 bseq[0] = *B_seq; 5548 } 5549 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5550 *B_seq = bseq[0]; 5551 ierr = PetscFree(bseq);CHKERRQ(ierr); 5552 if (!rowb) { 5553 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5554 } else { 5555 *rowb = isrowb; 5556 } 5557 if (!colb) { 5558 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5559 } else { 5560 *colb = iscolb; 5561 } 5562 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5563 PetscFunctionReturn(0); 5564 } 5565 5566 #undef __FUNCT__ 5567 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5568 /* 5569 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5570 of the OFF-DIAGONAL portion of local A 5571 5572 Collective on Mat 5573 5574 Input Parameters: 5575 + A,B - the matrices in mpiaij format 5576 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5577 5578 Output Parameter: 5579 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5580 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5581 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5582 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5583 5584 Level: developer 5585 5586 */ 5587 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5588 { 5589 VecScatter_MPI_General *gen_to,*gen_from; 5590 PetscErrorCode ierr; 5591 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5592 Mat_SeqAIJ *b_oth; 5593 VecScatter ctx =a->Mvctx; 5594 MPI_Comm comm; 5595 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5596 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5597 PetscScalar *rvalues,*svalues; 5598 MatScalar *b_otha,*bufa,*bufA; 5599 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5600 MPI_Request *rwaits = NULL,*swaits = NULL; 5601 MPI_Status *sstatus,rstatus; 5602 PetscMPIInt jj; 5603 PetscInt *cols,sbs,rbs; 5604 PetscScalar *vals; 5605 5606 PetscFunctionBegin; 5607 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5608 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5609 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5610 } 5611 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5612 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5613 5614 gen_to = (VecScatter_MPI_General*)ctx->todata; 5615 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5616 rvalues = gen_from->values; /* holds the length of receiving row */ 5617 svalues = gen_to->values; /* holds the length of sending row */ 5618 nrecvs = gen_from->n; 5619 nsends = gen_to->n; 5620 5621 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5622 srow = gen_to->indices; /* local row index to be sent */ 5623 sstarts = gen_to->starts; 5624 sprocs = gen_to->procs; 5625 sstatus = gen_to->sstatus; 5626 sbs = gen_to->bs; 5627 rstarts = gen_from->starts; 5628 rprocs = gen_from->procs; 5629 rbs = gen_from->bs; 5630 5631 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5632 if (scall == MAT_INITIAL_MATRIX) { 5633 /* i-array */ 5634 /*---------*/ 5635 /* post receives */ 5636 for (i=0; i<nrecvs; i++) { 5637 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5638 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5639 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5640 } 5641 5642 /* pack the outgoing message */ 5643 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5644 5645 sstartsj[0] = 0; 5646 rstartsj[0] = 0; 5647 len = 0; /* total length of j or a array to be sent */ 5648 k = 0; 5649 for (i=0; i<nsends; i++) { 5650 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5651 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5652 for (j=0; j<nrows; j++) { 5653 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5654 for (l=0; l<sbs; l++) { 5655 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5656 5657 rowlen[j*sbs+l] = ncols; 5658 5659 len += ncols; 5660 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5661 } 5662 k++; 5663 } 5664 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5665 5666 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5667 } 5668 /* recvs and sends of i-array are completed */ 5669 i = nrecvs; 5670 while (i--) { 5671 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5672 } 5673 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5674 5675 /* allocate buffers for sending j and a arrays */ 5676 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5677 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5678 5679 /* create i-array of B_oth */ 5680 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5681 5682 b_othi[0] = 0; 5683 len = 0; /* total length of j or a array to be received */ 5684 k = 0; 5685 for (i=0; i<nrecvs; i++) { 5686 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5687 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5688 for (j=0; j<nrows; j++) { 5689 b_othi[k+1] = b_othi[k] + rowlen[j]; 5690 len += rowlen[j]; k++; 5691 } 5692 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5693 } 5694 5695 /* allocate space for j and a arrrays of B_oth */ 5696 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5697 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5698 5699 /* j-array */ 5700 /*---------*/ 5701 /* post receives of j-array */ 5702 for (i=0; i<nrecvs; i++) { 5703 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5704 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5705 } 5706 5707 /* pack the outgoing message j-array */ 5708 k = 0; 5709 for (i=0; i<nsends; i++) { 5710 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5711 bufJ = bufj+sstartsj[i]; 5712 for (j=0; j<nrows; j++) { 5713 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5714 for (ll=0; ll<sbs; ll++) { 5715 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5716 for (l=0; l<ncols; l++) { 5717 *bufJ++ = cols[l]; 5718 } 5719 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5720 } 5721 } 5722 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5723 } 5724 5725 /* recvs and sends of j-array are completed */ 5726 i = nrecvs; 5727 while (i--) { 5728 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5729 } 5730 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5731 } else if (scall == MAT_REUSE_MATRIX) { 5732 sstartsj = *startsj_s; 5733 rstartsj = *startsj_r; 5734 bufa = *bufa_ptr; 5735 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5736 b_otha = b_oth->a; 5737 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5738 5739 /* a-array */ 5740 /*---------*/ 5741 /* post receives of a-array */ 5742 for (i=0; i<nrecvs; i++) { 5743 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5744 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5745 } 5746 5747 /* pack the outgoing message a-array */ 5748 k = 0; 5749 for (i=0; i<nsends; i++) { 5750 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5751 bufA = bufa+sstartsj[i]; 5752 for (j=0; j<nrows; j++) { 5753 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5754 for (ll=0; ll<sbs; ll++) { 5755 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5756 for (l=0; l<ncols; l++) { 5757 *bufA++ = vals[l]; 5758 } 5759 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5760 } 5761 } 5762 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5763 } 5764 /* recvs and sends of a-array are completed */ 5765 i = nrecvs; 5766 while (i--) { 5767 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5768 } 5769 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5770 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5771 5772 if (scall == MAT_INITIAL_MATRIX) { 5773 /* put together the new matrix */ 5774 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5775 5776 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5777 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5778 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5779 b_oth->free_a = PETSC_TRUE; 5780 b_oth->free_ij = PETSC_TRUE; 5781 b_oth->nonew = 0; 5782 5783 ierr = PetscFree(bufj);CHKERRQ(ierr); 5784 if (!startsj_s || !bufa_ptr) { 5785 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5786 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5787 } else { 5788 *startsj_s = sstartsj; 5789 *startsj_r = rstartsj; 5790 *bufa_ptr = bufa; 5791 } 5792 } 5793 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5794 PetscFunctionReturn(0); 5795 } 5796 5797 #undef __FUNCT__ 5798 #define __FUNCT__ "MatGetCommunicationStructs" 5799 /*@C 5800 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5801 5802 Not Collective 5803 5804 Input Parameters: 5805 . A - The matrix in mpiaij format 5806 5807 Output Parameter: 5808 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5809 . colmap - A map from global column index to local index into lvec 5810 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5811 5812 Level: developer 5813 5814 @*/ 5815 #if defined(PETSC_USE_CTABLE) 5816 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5817 #else 5818 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5819 #endif 5820 { 5821 Mat_MPIAIJ *a; 5822 5823 PetscFunctionBegin; 5824 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5825 PetscValidPointer(lvec, 2); 5826 PetscValidPointer(colmap, 3); 5827 PetscValidPointer(multScatter, 4); 5828 a = (Mat_MPIAIJ*) A->data; 5829 if (lvec) *lvec = a->lvec; 5830 if (colmap) *colmap = a->colmap; 5831 if (multScatter) *multScatter = a->Mvctx; 5832 PetscFunctionReturn(0); 5833 } 5834 5835 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5836 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5837 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5838 5839 #undef __FUNCT__ 5840 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5841 /* 5842 Computes (B'*A')' since computing B*A directly is untenable 5843 5844 n p p 5845 ( ) ( ) ( ) 5846 m ( A ) * n ( B ) = m ( C ) 5847 ( ) ( ) ( ) 5848 5849 */ 5850 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5851 { 5852 PetscErrorCode ierr; 5853 Mat At,Bt,Ct; 5854 5855 PetscFunctionBegin; 5856 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5857 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5858 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5859 ierr = MatDestroy(&At);CHKERRQ(ierr); 5860 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5861 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5862 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5863 PetscFunctionReturn(0); 5864 } 5865 5866 #undef __FUNCT__ 5867 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5868 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5869 { 5870 PetscErrorCode ierr; 5871 PetscInt m=A->rmap->n,n=B->cmap->n; 5872 Mat Cmat; 5873 5874 PetscFunctionBegin; 5875 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5876 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5877 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5878 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5879 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5880 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5881 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5882 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5883 5884 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5885 5886 *C = Cmat; 5887 PetscFunctionReturn(0); 5888 } 5889 5890 /* ----------------------------------------------------------------*/ 5891 #undef __FUNCT__ 5892 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5893 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5894 { 5895 PetscErrorCode ierr; 5896 5897 PetscFunctionBegin; 5898 if (scall == MAT_INITIAL_MATRIX) { 5899 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5900 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5901 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5902 } 5903 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5904 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5905 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5906 PetscFunctionReturn(0); 5907 } 5908 5909 #if defined(PETSC_HAVE_MUMPS) 5910 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5911 #endif 5912 #if defined(PETSC_HAVE_PASTIX) 5913 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5914 #endif 5915 #if defined(PETSC_HAVE_SUPERLU_DIST) 5916 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5917 #endif 5918 #if defined(PETSC_HAVE_CLIQUE) 5919 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5920 #endif 5921 5922 /*MC 5923 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5924 5925 Options Database Keys: 5926 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5927 5928 Level: beginner 5929 5930 .seealso: MatCreateAIJ() 5931 M*/ 5932 5933 #undef __FUNCT__ 5934 #define __FUNCT__ "MatCreate_MPIAIJ" 5935 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5936 { 5937 Mat_MPIAIJ *b; 5938 PetscErrorCode ierr; 5939 PetscMPIInt size; 5940 5941 PetscFunctionBegin; 5942 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5943 5944 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5945 B->data = (void*)b; 5946 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5947 B->assembled = PETSC_FALSE; 5948 B->insertmode = NOT_SET_VALUES; 5949 b->size = size; 5950 5951 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5952 5953 /* build cache for off array entries formed */ 5954 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5955 5956 b->donotstash = PETSC_FALSE; 5957 b->colmap = 0; 5958 b->garray = 0; 5959 b->roworiented = PETSC_TRUE; 5960 5961 /* stuff used for matrix vector multiply */ 5962 b->lvec = NULL; 5963 b->Mvctx = NULL; 5964 5965 /* stuff for MatGetRow() */ 5966 b->rowindices = 0; 5967 b->rowvalues = 0; 5968 b->getrowactive = PETSC_FALSE; 5969 5970 /* flexible pointer used in CUSP/CUSPARSE classes */ 5971 b->spptr = NULL; 5972 5973 #if defined(PETSC_HAVE_MUMPS) 5974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5975 #endif 5976 #if defined(PETSC_HAVE_PASTIX) 5977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5978 #endif 5979 #if defined(PETSC_HAVE_SUPERLU_DIST) 5980 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5981 #endif 5982 #if defined(PETSC_HAVE_CLIQUE) 5983 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5984 #endif 5985 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5986 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5987 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5988 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5989 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5990 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5991 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5998 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5999 PetscFunctionReturn(0); 6000 } 6001 6002 #undef __FUNCT__ 6003 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 6004 /*@ 6005 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6006 and "off-diagonal" part of the matrix in CSR format. 6007 6008 Collective on MPI_Comm 6009 6010 Input Parameters: 6011 + comm - MPI communicator 6012 . m - number of local rows (Cannot be PETSC_DECIDE) 6013 . n - This value should be the same as the local size used in creating the 6014 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6015 calculated if N is given) For square matrices n is almost always m. 6016 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6017 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6018 . i - row indices for "diagonal" portion of matrix 6019 . j - column indices 6020 . a - matrix values 6021 . oi - row indices for "off-diagonal" portion of matrix 6022 . oj - column indices 6023 - oa - matrix values 6024 6025 Output Parameter: 6026 . mat - the matrix 6027 6028 Level: advanced 6029 6030 Notes: 6031 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6032 must free the arrays once the matrix has been destroyed and not before. 6033 6034 The i and j indices are 0 based 6035 6036 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6037 6038 This sets local rows and cannot be used to set off-processor values. 6039 6040 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6041 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6042 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6043 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6044 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6045 communication if it is known that only local entries will be set. 6046 6047 .keywords: matrix, aij, compressed row, sparse, parallel 6048 6049 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6050 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6051 @*/ 6052 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6053 { 6054 PetscErrorCode ierr; 6055 Mat_MPIAIJ *maij; 6056 6057 PetscFunctionBegin; 6058 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6059 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6060 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6061 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6062 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6063 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6064 maij = (Mat_MPIAIJ*) (*mat)->data; 6065 6066 (*mat)->preallocated = PETSC_TRUE; 6067 6068 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6069 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6070 6071 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6072 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6073 6074 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6075 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6076 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6077 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6078 6079 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6080 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6081 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6082 PetscFunctionReturn(0); 6083 } 6084 6085 /* 6086 Special version for direct calls from Fortran 6087 */ 6088 #include <petsc-private/fortranimpl.h> 6089 6090 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6091 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6092 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6093 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6094 #endif 6095 6096 /* Change these macros so can be used in void function */ 6097 #undef CHKERRQ 6098 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6099 #undef SETERRQ2 6100 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6101 #undef SETERRQ3 6102 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6103 #undef SETERRQ 6104 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6105 6106 #undef __FUNCT__ 6107 #define __FUNCT__ "matsetvaluesmpiaij_" 6108 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6109 { 6110 Mat mat = *mmat; 6111 PetscInt m = *mm, n = *mn; 6112 InsertMode addv = *maddv; 6113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6114 PetscScalar value; 6115 PetscErrorCode ierr; 6116 6117 MatCheckPreallocated(mat,1); 6118 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6119 6120 #if defined(PETSC_USE_DEBUG) 6121 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6122 #endif 6123 { 6124 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6125 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6126 PetscBool roworiented = aij->roworiented; 6127 6128 /* Some Variables required in the macro */ 6129 Mat A = aij->A; 6130 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6131 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6132 MatScalar *aa = a->a; 6133 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6134 Mat B = aij->B; 6135 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6136 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6137 MatScalar *ba = b->a; 6138 6139 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6140 PetscInt nonew = a->nonew; 6141 MatScalar *ap1,*ap2; 6142 6143 PetscFunctionBegin; 6144 for (i=0; i<m; i++) { 6145 if (im[i] < 0) continue; 6146 #if defined(PETSC_USE_DEBUG) 6147 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6148 #endif 6149 if (im[i] >= rstart && im[i] < rend) { 6150 row = im[i] - rstart; 6151 lastcol1 = -1; 6152 rp1 = aj + ai[row]; 6153 ap1 = aa + ai[row]; 6154 rmax1 = aimax[row]; 6155 nrow1 = ailen[row]; 6156 low1 = 0; 6157 high1 = nrow1; 6158 lastcol2 = -1; 6159 rp2 = bj + bi[row]; 6160 ap2 = ba + bi[row]; 6161 rmax2 = bimax[row]; 6162 nrow2 = bilen[row]; 6163 low2 = 0; 6164 high2 = nrow2; 6165 6166 for (j=0; j<n; j++) { 6167 if (roworiented) value = v[i*n+j]; 6168 else value = v[i+j*m]; 6169 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 6170 if (in[j] >= cstart && in[j] < cend) { 6171 col = in[j] - cstart; 6172 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 6173 } else if (in[j] < 0) continue; 6174 #if defined(PETSC_USE_DEBUG) 6175 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6176 #endif 6177 else { 6178 if (mat->was_assembled) { 6179 if (!aij->colmap) { 6180 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6181 } 6182 #if defined(PETSC_USE_CTABLE) 6183 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6184 col--; 6185 #else 6186 col = aij->colmap[in[j]] - 1; 6187 #endif 6188 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6189 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6190 col = in[j]; 6191 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6192 B = aij->B; 6193 b = (Mat_SeqAIJ*)B->data; 6194 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6195 rp2 = bj + bi[row]; 6196 ap2 = ba + bi[row]; 6197 rmax2 = bimax[row]; 6198 nrow2 = bilen[row]; 6199 low2 = 0; 6200 high2 = nrow2; 6201 bm = aij->B->rmap->n; 6202 ba = b->a; 6203 } 6204 } else col = in[j]; 6205 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 6206 } 6207 } 6208 } else if (!aij->donotstash) { 6209 if (roworiented) { 6210 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6211 } else { 6212 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6213 } 6214 } 6215 } 6216 } 6217 PetscFunctionReturnVoid(); 6218 } 6219 6220